diff --git a/llm/config/llama/longlora.json b/llm/config/llama/longlora.json new file mode 100644 index 000000000000..60ac951084c1 --- /dev/null +++ b/llm/config/llama/longlora.json @@ -0,0 +1,36 @@ +{ + "model_name_or_path": "meta-llama/Meta-Llama-3-8B", + "dataset_name_or_path": "./data", + "output_dir": "./checkpoints/lora_ckpts", + "per_device_train_batch_size": 4, + "gradient_accumulation_steps": 4, + "per_device_eval_batch_size": 8, + "eval_accumulation_steps":16, + "num_train_epochs": 1, + "learning_rate": 3e-04, + "warmup_steps": 30, + "logging_steps": 1, + "evaluation_strategy": "epoch", + "save_strategy": "epoch", + "src_length": 1024, + "max_length": 2048, + "bf16": true, + "fp16_opt_level": "O2", + "do_train": true, + "do_eval": true, + "disable_tqdm": true, + "load_best_model_at_end": true, + "eval_with_do_generation": false, + "metric_for_best_model": "accuracy", + "recompute": true, + "save_total_limit": 1, + "tensor_parallel_degree": 1, + "pipeline_parallel_degree": 1, + "sharding": "stage1", + "lora": true, + "zero_padding": false, + "use_flash_attention": true, + "unified_checkpoint": true, + "pissa": false, + "use_mora": false + } diff --git a/llm/run_finetune.py b/llm/run_finetune.py index 616eb8a8e17a..026f360817b4 100644 --- a/llm/run_finetune.py +++ b/llm/run_finetune.py @@ -66,6 +66,7 @@ Qwen2MoeForCausalLMPipe, ) from paddlenlp.transformers.configuration_utils import LlmMetaConfig +from paddlenlp.transformers.longlora import replace_llama_attn, set_group_size from paddlenlp.trl import DataConfig, ModelConfig, SFTConfig, SFTTrainer from paddlenlp.trl.llm_utils import ( ZeroPaddingIterDatasetCallback, @@ -168,6 +169,13 @@ def main(): quantization_config=quantization_config, ) + if training_args.use_ssa: + assert ( + training_args.ssa_group_size_ratio is not None + ), "ssa_group_size_ratio must be specified when use_ssa is True" + set_group_size(training_args.ssa_group_size_ratio) + replace_llama_attn() + architectures_to_check = {"Qwen2Moe", "DeepseekV2", "DeepseekV3"} if ( any(architecture in str(model_config.architectures) for architecture in architectures_to_check) @@ -192,6 +200,8 @@ def main(): model_config.fuse_attention_ffn = model_args.fuse_attention_ffn model_config.seq_length = data_args.max_length + orig_ctx_len = getattr(model_config, "max_position_embeddings", None) + model_args.rope_scaling_factor = data_args.max_length // orig_ctx_len # Config for model useing long sequence strategy if model_args.use_long_sequence_strategies: diff --git a/llm/utils/data.py b/llm/utils/data.py index dbecb49778e6..352323717b4a 100644 --- a/llm/utils/data.py +++ b/llm/utils/data.py @@ -84,7 +84,7 @@ def tokenize_unsupervised_example(tokenizer, example, data_args, is_test=True, z source, truncation=False, padding=True, - max_length=data_args.scaled_max_length, + max_length=data_args.src_length, add_special_tokens=True, ) diff --git a/paddlenlp/transformers/longlora.py b/paddlenlp/transformers/longlora.py new file mode 100644 index 000000000000..1613a6b924a8 --- /dev/null +++ b/paddlenlp/transformers/longlora.py @@ -0,0 +1,135 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + +import paddle +import paddle.nn.functional as F + +import paddlenlp +from paddlenlp.transformers.llama.modeling import get_triangle_upper_mask + +ssa_group_size_ratio = 1 / 4 + + +def shift(qkv, bsz, q_len, group_size, num_heads, head_dim): + assert qkv.shape == [bsz, num_heads, q_len, head_dim], "qkv shape does not match expected shape" + # Calculate the shift amount for rolling + shift_amount = -group_size // 2 + # Roll the qkv tensor along the sequence length axis + qkv[:, num_heads // 2 :] = qkv[:, num_heads // 2 :].roll(shift_amount, axis=2) + + # Reshape the tensor to the desired shape + qkv = qkv.reshape([bsz * (q_len // group_size), group_size, num_heads, head_dim]) + return qkv + + +def ssa_scaled_dot_product_attention( + query_states, + config, + key_states, + value_states, + attention_mask, + output_attentions, + alibi=None, + sequence_parallel=False, + reshard_layer=None, + **kwargs +): + bsz, q_len, num_heads, head_dim = query_states.shape + if config.context_parallel_degree > 1: + raise ValueError("Context parallel requires `use_flash_attention=True`") + # [ bz, seqlen, nhead, head_dim] -> [bs, nhead, seq_len, head_dim] + query_states = paddle.transpose(query_states, [0, 2, 1, 3]) + # merge with the next tranpose + key_states = paddle.transpose(key_states, [0, 2, 1, 3]) + value_states = paddle.transpose(value_states, [0, 2, 1, 3]) + assert ssa_group_size_ratio is not None, "ssa_group_size_ratio must provide" + + # Calculate the group size based on the sequence length and the group size ratio + group_size = q_len if int(q_len * ssa_group_size_ratio) == 0 else int(q_len * ssa_group_size_ratio) + assert q_len % group_size == 0, f"q_len {q_len} must be divisible by group size {group_size}." + + num_group = q_len // group_size + + # Apply shifting to the query, key, and value states + query_states = shift(query_states, bsz, q_len, group_size, num_heads, head_dim) + key_states = shift(key_states, bsz, q_len, group_size, num_heads, head_dim) + value_states = shift(value_states, bsz, q_len, group_size, num_heads, head_dim) + query_states = paddle.transpose(query_states, [0, 2, 1, 3]) + key_states = paddle.transpose(key_states, [0, 2, 1, 3]) + value_states = paddle.transpose(value_states, [0, 2, 1, 3]) + # matmul and device by sqrt(head_dim) + attn_weights = paddle.matmul(query_states / math.sqrt(head_dim), key_states.transpose([0, 1, 3, 2])) + + # then add alibi bias + if alibi is not None: + alibi = alibi.reshape([bsz, num_heads, 1, -1]) + attn_weights = attn_weights + alibi + if paddle.in_dynamic_mode() and attn_weights.shape != [bsz * num_group, num_heads, group_size, group_size]: + raise ValueError( + f"Attention weights should be of shape {(bsz * num_group, num_heads, group_size, group_size)}, but is" + f" {attn_weights.shape}" + ) + + # In sep mode, the attenion mask should be created in the runtime. + if reshard_layer is not None: + attention_mask = None + + if attention_mask is None: + attention_mask = get_triangle_upper_mask(attn_weights) + attention_mask = paddle.tile( + paddle.cast(attention_mask[:, :, :group_size, :group_size], dtype="float32"), [num_group, 1, 1, 1] + ) + + if attention_mask.shape != [bsz * num_group, 1, group_size, group_size]: + attention_mask = attention_mask[: bsz * num_group, :, :, :] + + attn_weights = attn_weights + attention_mask + if not paddle.in_dynamic_mode(): + attn_weights = F.softmax(attn_weights, axis=-1, dtype="float32").astype(query_states.dtype) + else: + with paddle.amp.auto_cast(False): + attn_weights = F.softmax(attn_weights, axis=-1, dtype="float32").astype(query_states.dtype) + + attn_output = paddle.matmul(attn_weights, value_states) + attn_output = attn_output.transpose([0, 2, 1, 3]) + + # shift back + attn_output = attn_output.reshape([bsz, q_len, num_heads, head_dim]) + attn_output[:, :, num_heads // 2 :] = attn_output[:, :, num_heads // 2 :].roll(group_size // 2, axis=1) + + if reshard_layer is not None: + attn_output = reshard_layer( + attn_output, + split_axis=1, + concat_axis=2, + ) + q_len = q_len // config.sep_parallel_degree + num_heads = num_heads * config.sep_parallel_degree + + if sequence_parallel: + attn_output = attn_output.reshape([bsz * q_len, head_dim * num_heads]) + else: + attn_output = attn_output.reshape([bsz, q_len, head_dim * num_heads]) + return (attn_output, attn_weights) if output_attentions else attn_output + + +def set_group_size(group_size_ratio): + global ssa_group_size_ratio + ssa_group_size_ratio = group_size_ratio + + +def replace_llama_attn(): + paddlenlp.transformers.llama.modeling.scaled_dot_product_attention = ssa_scaled_dot_product_attention diff --git a/paddlenlp/trl/sft_config.py b/paddlenlp/trl/sft_config.py index 283244efbb5b..802f401ebc83 100644 --- a/paddlenlp/trl/sft_config.py +++ b/paddlenlp/trl/sft_config.py @@ -62,6 +62,18 @@ class SFTConfig(TrainingArguments): model_init_kwargs: Optional[dict[str, Any]] = None dataset_kwargs: Optional[dict[str, Any]] = None eval_packing: Optional[bool] = None + use_ssa: bool = field( + default=False, + metadata={ + "help": "Whether to use Shifted Sparse Attention (SSA), an efficient attention mechanism introduced in the LongLoRA paper." + }, + ) + ssa_group_size_ratio: float = field( + default=0.25, + metadata={ + "help": "The ratio parameter for grouping in SSA, controlling the number of tokens considered in each group for sparse attention calculation." + }, + ) def __post_init__(self): super().__post_init__() diff --git a/pyproject.toml b/pyproject.toml index db0d1a14946a..3c1f51813d50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,23 +13,7 @@ minversion = "6.0" addopts = "-ra -q " pythonpath = ["."] testpaths = [ - "tests/data", - "tests/dataaug", - "tests/datasets", - "tests/embeddings", - "tests/experimental", - "tests/generation", - "tests/layers", - "tests/metrics", - "tests/pose", - "tests/ops", - "tests/trainer", - "tests/transformers", - "tests/peft", - "tests/prompt", - "tests/mergekit", - # "tests/taskflow", TODO (paddle 2.5.1 breaks this test suite, debug later) - "tests/utils", + "tests/longlora", ] python_files = [ "test.py", diff --git a/tests/fixtures/llm/autoregressive_data/dev.json b/tests/fixtures/llm/autoregressive_data/dev.json new file mode 100644 index 000000000000..91cfd67bb07a --- /dev/null +++ b/tests/fixtures/llm/autoregressive_data/dev.json @@ -0,0 +1,2 @@ +{"src": "Are you a resident of Pinnacle who owns a small business and operates from your home?\nCan you provide a service to your fellow residents of Pinnacle? If you've answered yes to both of these questions, supply your details below and we will list your business on our site.\nResidents of Pinnacle, support your local community by checking here first and seeing whether one of your neighbours can assist."} +{"src": "On October 27, 2016 GreenWorks led a tour for the College of Architecture and Landscape Architecture of Beijing University and staff of Landscape Architecture Frontiers publication. This tour group was particularly interested in technical issues related to: soil/vegetation approaches for water quality treatment; the ultra- violet finishing treatment that allows for human contact with the treated water; and soil capping issues for a former brownfield site. GreenWorks typically leads 4-6 tours per year since Tanner Springs Park opened in 2005. Tour groups have included national and international professional and environmental organizations and academic institutions. Visitors are interested in a variety of issues, including design inspiration, public involvement and outreach, and technical challenges.\nCouch Park comment forms for the playground and plaza improvements are due Thursday December 10th.\nGreenWorks was been hired by Portland Parks & Recreation to design the new playground, address accessibility issues in the plaza and install a new Portland Loo at Couch Park as part of the Parks Replacement Bond. We presented the three options below for the playground at an Open House on December 3rd . Online comments are due Thursday December 10th and can be found here: http://www.portlandoregon.gov/parks/68915. One of the top priorities for the playground is for it to be inclusive, which mean that it should be designed for children of all ages and abilities. We have been working closely with Mara Kaplan from Let Kids Play http://www.letkidsplay.com/ who is a national expert and advocate for inclusive playground design. Mara was brought on to the design team to help us design a playground that provides exceptional play opportunities for all children.\nGreenWorks met with the city of Astoria to present the Downtown Astoria Pedestrian Wayfinding Concept Plan. Those in attendance were city officials, focus group members, and Astoria community members. The presentation focused on distinct sign typologies that direct and inform pedestrians getting around downtown Astoria. Following the presentation was an interactive group discussion about sign locations, aesthetic preferences interpretive sign opportunities."} \ No newline at end of file diff --git a/tests/fixtures/llm/autoregressive_data/train.json b/tests/fixtures/llm/autoregressive_data/train.json new file mode 100644 index 000000000000..f3241c9fff6e --- /dev/null +++ b/tests/fixtures/llm/autoregressive_data/train.json @@ -0,0 +1,20 @@ +{"src": "Are you a resident of Pinnacle who owns a small business and operates from your home?\nCan you provide a service to your fellow residents of Pinnacle? If you've answered yes to both of these questions, supply your details below and we will list your business on our site.\nResidents of Pinnacle, support your local community by checking here first and seeing whether one of your neighbours can assist."} +{"src": "On October 27, 2016 GreenWorks led a tour for the College of Architecture and Landscape Architecture of Beijing University and staff of Landscape Architecture Frontiers publication. This tour group was particularly interested in technical issues related to: soil/vegetation approaches for water quality treatment; the ultra- violet finishing treatment that allows for human contact with the treated water; and soil capping issues for a former brownfield site. GreenWorks typically leads 4-6 tours per year since Tanner Springs Park opened in 2005. Tour groups have included national and international professional and environmental organizations and academic institutions. Visitors are interested in a variety of issues, including design inspiration, public involvement and outreach, and technical challenges.\nCouch Park comment forms for the playground and plaza improvements are due Thursday December 10th.\nGreenWorks was been hired by Portland Parks & Recreation to design the new playground, address accessibility issues in the plaza and install a new Portland Loo at Couch Park as part of the Parks Replacement Bond. We presented the three options below for the playground at an Open House on December 3rd . Online comments are due Thursday December 10th and can be found here: http://www.portlandoregon.gov/parks/68915. One of the top priorities for the playground is for it to be inclusive, which mean that it should be designed for children of all ages and abilities. We have been working closely with Mara Kaplan from Let Kids Play http://www.letkidsplay.com/ who is a national expert and advocate for inclusive playground design. Mara was brought on to the design team to help us design a playground that provides exceptional play opportunities for all children.\nGreenWorks met with the city of Astoria to present the Downtown Astoria Pedestrian Wayfinding Concept Plan. Those in attendance were city officials, focus group members, and Astoria community members. The presentation focused on distinct sign typologies that direct and inform pedestrians getting around downtown Astoria. Following the presentation was an interactive group discussion about sign locations, aesthetic preferences interpretive sign opportunities."} + {"src": "For the rider who\u2019s ready to learn it all, our value bundles offer you the benefit of all the DVDs for your Harley-Davidson\u00ae Touring model at our best prices. You\u2019ll get the Maintenance DVDs Part 1 & 2, the Bolt-On Performance Edition, the Touring Rear Belt Replacement and Touring Oil Cooler Install DVD sets. That\u2019s over 15 Hours of detailed procedures!\nThis educational do-it-yourself Touring maintenance DVD covers all brands of Harley-Davidson\u00ae Touring motorcycles. From late 1984 \u2013 2016 (Evo\u00ae, Twin Cam\u00ae 88, 96, 103, 110) we have your Road Kings, Ultras, Electras, and Road/Street Glides covered.\nThis DVD will pay for itself with just one oil change.\nWhen we released the original Fix My Hog\u00ae maintenance DVDs, we couldn\u2019t have anticipated the reaction from critics and customers alike-it went so far beyond our wildest expectations. Since then, letters, calls and e-mails have poured in from around the world suggesting additional topics we should cover, so that\u2019s what we did. These follow up DVDs cover new procedures and technological advances (Twin Cam\u00ae 88, 96, 103, 110) and are a great complement to the original versions. A great way to Fix My Hog and save money in today\u2019s economy. *Note we do not cover the Rushmore changes on this DVD set.\nThis DVD series is designed to show riders how to enhance the look and performance of their Touring model motorcycles. Taped in a professional motorcycle repair shop, the trained mechanics perform and explain each procedure in detail. The Bolt-On Performance and Accessory DVDs feature footage of installations applicable to Evo\u00ae, Twin Cam\u00ae 88, 96, 103 & 110 motorcycles. This three-DVD set is crammed with more than six hours of valuable instruction, interviews and insider tips.\nThe Form-A-Funnel easily molds, then holds its shape so you get every last drop of oil every time, no matter how obstructed the filter or plug.\nMolds into and holds any shape to create a leakproof channel for draining oil or fluid.\nPrecise, reliable sealing from Loctite.\nFormulated to work like Loctite Hi-Tack Liquids, these wax-like sticks are less messy, have a low odor, and are solvent-free. Loctite Hi-Tack sticks set up quickly to a very tacky film, to seal and hold the heaviest gaskets in place. They resist gasoline, oil, and common shop fluids. Suggested applications include valve covers, fuel pumps, carburetors, and manifold gaskets."} + {"src": "France Music Charts List of all local music charts served by Popnable.\nMost viewed music videos from France, ranked on daily basis.\nMost viewed music videos from France, ranked on weekly, monthly and yearly basis.\nMost viewed artists from France, ranked on monthly and yearly basis.\nMost liked songs from France, ranked on weekly basis."} +{"src": "I am a conservation biologist and molecular ecologist, interested in understanding the ecological and evolutionary processes that generate and maintain diversity within and among populations. The primary motivation for my work is to apply this fundamental understanding of biology to solve pressing problems in conservation and management.\nOutreach: Dr. Meek is a member of the IUCN North American Conservation Genetics Specialist Group, Vice President of the Society for Conservation Biology Conservation Genetics Working Group, and a member of the Interagency Ecological Program Salmonid Genetics Project Work Team.\nDr. Meek is on the editorial board of Conservation Science and Practice.\nMy research focuses on the use of genetic tools to study population- and species-level relationships in marine and freshwater fishes. The primary motivation for this work is to improve our understanding of evolutionary processes in aquatic environments, and to provide practical information for management and conservation. Find out more about my work on my website.\nI am interested in conservation biology and the effects of climate change on populations. I completed my M.A. in Biology at Buffalo State College studying the metagenomic diversity of fungal communities in and outside of ant nests, and I am excited to study conservation genomics and diversity in the Meek lab.\nMy research interests center around the population dynamics and genetics of aquatic species, as well as overall species diversity in aquatic environments. Specifically, I am interested in the mechanisms of population divergence and the barriers to gene flow. I am primarily concerned with how these patterns and processes are relevant for conservation and management.\nI study conservation genetics and am interested in small population conservation. My research centers around discovering impacts on populations from fragmentation, especially due to anthropogenic change. I hope to use my research to help inform future management actions and to create public outreach programs.\nI am an Integrative Biology master\u2019s student at Michigan State University. I recently graduated from the University of Arizona, receiving a B.S. in Organismal Biology. While at the University of Arizona, I was involved in several projects working with human and plant pathogens, and was president of the Criminal Justice Association. I am a Department of Defense SMART Scholar, and will work for the U.S. Army Defense Forensic Science Center upon graduation. I am interested in the next generation sequencing applications to forensic science.\nSierra is co-advised by Dr. Benbow in the Department of Entomology: https://ericbenbow.wixsite.com/website.\nDo not hesitate to contact Sierra with any questions: kaszubin[at]msu.edu.\nI\u2019m a senior undergraduate at Michigan State University, majoring in zoology with a concentration in ecology, evolution, and organismal biology. My research interests are in the genetics and evolution of behavior, and my goal is to attend graduate school to pursue a PhD in evolutionary biology.\nEllery studies the diversity of tastes in her environment and is on a quest to identify the most crinkly material in the world. She is also researching the ecology and behavior of the Giant Schnauzer.\nChai\u2019s research interests are in the behavioral ecology of small mammals, with a particular focus on trying to understand the adaptations that allow squirrels to successfully avoid canine predation."} +{"src": "Dr Wiffen is a graduate of University of Queensland Medical School and trained in ophthalmology in Western Australia before undertaking two-year fellowships in cornea and refractive surgery at both the Corneo-Plastic Unit, East Grinstead, UK, and the Mayo Clinic, Rochester, Minnesota, USA. He was Director of the Corneo-Plastic Unit and Eye Bank in East Grinstead from 1993-1994. He has been a Consultant Ophthalmologist at Fremantle Hospital since 1997 and was Associate Professor in the Centre for Ophthalmology and Visual Science, UWA, from 1997-2014. He has been Director of the Lions Eye Bank of Western Australia since 1997. Dr Wiffen has held numerous other positions, including Head of Department of Ophthalmology Fremantle Hospital, Chair of the Qualifications and Education Committee of the WA Branch of RANZCO, Chair of Eye Banks Australia and New Zealand and Chair of the Cornea Standing Committee of the Transplantation Society of Australia and New Zealand. He has special expertise in corneal transplantation, pterygium and cataract surgery as well as refractive surgery.\nFellowships at the Corneo-Plastic Unit, East Grinstead, UK, and at the Mayo Clinic, Rochester, Minnesota, USA.\nOcular surface disorders, corneal and refractive surgery, anterior segment disorders & surgery."} +{"src": "Contact us by filling out the form below and we will contact you back shortly.\nHOW DID YOU FIND US ONLINE?\nMiami, FL 33222 USA Ana Jim\u00e9nez Dermocosm\u00e9tica, SL."} +{"src": "Learn how to create a proactive infection prevention (IP) plan based on a comprehensive infection control (IC) risk assessment\u2014a perpetual document that serves as the foundation of your program. Look at techniques for evaluating the actual risk factors for your population, the services you provide and geographic and community-based trends in your region. Return home with the know-how you need to conduct an IC risk analysis that can help you improve the effectiveness of your IP plan by better prioritizing your prevention strategies."} +{"src": "1. Falken Tyre Fuel Promotion (the \"Promotion\") is open to all consumers who are UK residents aged 18 and over, except ALLCARZ (the \"Promoter\") employees, its agencies or anyone else connected with the creation and administration of this Promotion, or trade customers, including tyre dealer's employees. This offer is open to retail customers only making a qualifying purchase online.\n5. This offer applies to the online purchase of 2 \u2013 4 Falken Tyres between 06.02.19 and 31.03.19 online on www.oswestrytyrescentre.co.uk using the valid discount code FALKENFUEL.\n13. ALLCARZ reserve the right to vary or amend these terms and conditions or to withdraw the promotion at any time."} +{"src": "How to Take Meeting Minutes General Overview of Meeting Minutes Generally, minutes begin with the name of the body (e.g. a committee) holding the meeting, place, date, list of people present, and the time that the chair called the meeting to order.... Structured vs. Informal. If your PTA follows Robert's Rules of Order-- a process of proposing, discussing and voting on motions -- you should find it much easier to take meeting minutes, as all major actions will be organized by motions and the resulting votes.\nDownload our Meeting Minute Checklist for Associations and Nonprofits with sample minutes taken at a meeting and learn how to take better minutes. 3. The Minutes Writing Process... Taking minutes is important for virtually any meeting. Despite the importance of this task, it is not easy especially if you are not well prepared for it. Fortunately, with the following tips, anyone can effectively take minutes.\nStructured vs. Informal. If your PTA follows Robert's Rules of Order-- a process of proposing, discussing and voting on motions -- you should find it much easier to take meeting minutes, as all major actions will be organized by motions and the resulting votes.... How to Take Meeting Minutes General Overview of Meeting Minutes Generally, minutes begin with the name of the body (e.g. a committee) holding the meeting, place, date, list of people present, and the time that the chair called the meeting to order.\nTaking minutes is important for virtually any meeting. Despite the importance of this task, it is not easy especially if you are not well prepared for it. Fortunately, with the following tips, anyone can effectively take minutes."} +{"src": "A forum member of ModMyI claims that he was not able to jailbreak his iPad that he had bought recently using Redsn0w.\nThis has led to speculation that Apple might have patched the limera1n exploit that has been used in Redsn0w to jailbreak iOS 4.2.1 in newer iPhones, iPads and iPod Touches that are being shipped.\nThe purpose of this guide is to provide Mac users step by step instructions to jailbreak iPad using limera1n.\nMuscleNerd of the iPhone Dev Team has just announced via Twitter that they\u2019ve figured out a way to use the PwnageTool, limera1n exploit and Comex\u2019s kernel hacks to jailbreak future firmwares (so iOS 4.2 and beyond) for all A4 chip based devices such as iPhone 4, iPod Touch 4G, iPad and new Apple TV.\nIt looks like the cat and mouse game between Apple and the iPhone hacking community is going to be even more interesting with this breakthrough.\nGeohot has just released limera1n for Mac users that can jailbreak iPhone 3GS, iPhone 4, iPad, iPod Touch 3G and the new iPod Touch 4G.\nGeohot had released limera1n for Windows users over the weekend, which had become very popular with users.\nGeohot has just announced that limera1n is out of beta and limera1n RC1b is available for download.\nIf you had problems jailbreaking your iPhone 3GS, iPhone 4, iPad, iPod Touch 3G and the new iPod Touch 4G using limera1n then try it again using the latest version of limera1n as many readers have reported that the new version has fixed the problems they were facing with the previous version (especially iPad users).\nGeohot who had abandoned the jailbreaking scene in July surprised everyone by releasing limera1n a day before greenpois0n was released by the Chronic Dev Team.\niPhone Dev Team and Chronic Dev Team who were hit the hardest as they have been working hard to release greenpois0n have commented on Geohot\u2019s limera1n."} +{"src": "Since the beginning, Sensory has been a pioneer in advancing AI technologies for consumer electronics. Not only did Sensory implement the first commercially successful speech recognition chip, but we also were first to bring biometrics to low cost chips, and speech recognition to Bluetooth devices. Perhaps what I am most proud of though, more than a decade ago Sensory introduced its TrulyHandsfree technology and showed the world that wakeup words could really work in real devices, getting around the false accept and false reject, and power consumption issues that had plagued the industry. No longer did speech recognition devices require button presses\u2026and it caught on quickly!\nLet me go on boasting because I think Sensory has a few more claims to fame\u2026 Do you think Apple developed the first \u201cHey Siri\u201d wake word? Did Google develop the first \u201cOK Google\u201d wake word? What about \u201cHey Cortana\u201d? I believe Sensory developed these initial wake words, some as demos and some shipped in real products (like the Motorola MotoX smartphone and certain glasses). Even third-party Alexa and Cortana products today are running Sensory technology to wake up the Alexa cloud service.\nSensory\u2019s roots are in neural nets and machine learning. I know everyone does that today, but it was quite out of favor when Sensory used machine learning to create a neural net speech recognition system in the 1990\u2019s and 2000\u2019s. Today everyone and their brother is doing deep learning (yeah that\u2019s tongue in cheek because my brother is doing it too! (http://www.cs.colorado.edu/~mozer/index.php). And a lot of these deep learning companies are huge multi-billion-dollar business or extremely well-funded startups.\nSo, can Sensory stay ahead now and continuing pioneering innovation in AI now that everyone is using machine learning and doing AI? Of course, the answer is yes!\nSensory is now doing computer vision with convolutional neural nets. We are coming out with deep learning noise models to improve speech recognition performance and accuracy, and are working on small TTS systems using deep learning approaches that help them sound lifelike. And of course, we have efforts in biometrics and natural language that also use deep learning.\nWe are starting to combine a lot of technologies together to show that embedded systems can be quite powerful. And because we have been around longer and thought through most of these implementations years before others, we have a nice portfolio of over 3 dozen patents covering these embedded AI implementations. Hand in hand with Sensory\u2019s improvements in AI software, companies like ARM, NVidia, Intel, Qualcomm and others are investing and improving upon neural net chips that can perform parallel processing for specialized AI functions, so the world will continue seeing better and better AI offerings on \u201cthe edge\u201d.\nWhat Makes the Latest Version of TrulySecure so Different?\nA key measure of any biometric system is the inherent accuracy of the matching algorithm. Earlier attempts at face recognition were based on traditional computer vision (CV) techniques. The first attempts involved measuring key distances on the face and comparing those across images, from which the idea of the number of \u201cfacial features\u201d associated with an algorithm was born. This method turned out to be very brittle however, especially as the pose angle or expression varied. The next class of algorithms involved parsing the face into a grid, and analyzing each section of the grid individually via standard CV techniques, such as frequency analysis, wavelet transforms, local binary patterns (LBP), etc. Up until recently, these constituted the state of the art in face recognition. Voice recognition has a similar history in the use of traditional signal processing techniques.\nSensory\u2019s TrulySecure uses a deep learning approach in our face and voice recognition algorithms. Deep learning (a subset of machine learning) is a modern variant of artificial neural networks, which Sensory has been using since the very beginning in 1994, and thus we have extensive experience in this area. In just the last few years, deep learning has become the primary technology for many CV applications, and especially face recognition. There have been recent announcements in the news by Google, Facebook, and others on face recognition systems they have developed that outperform humans. This is based on analyzing a data set such as Labeled Faces in the Wild, which has images captured over a very wide ranging set of conditions, especially larger angles and distances from the face. We\u2019ve trained our network for the authentication case, which has a more limited range of conditions, using our large data set collected via AppLock and other methods. This allows us to perform better than those algorithms would do for this application, while also keeping our size and processing power requirements under control (the Google and Facebook deep learning implementations are run on arrays of servers).\nOne consequence of the deep learning approach is that we don\u2019t use a number of points on the face per se. The salient features of a face are compressed down to a set of coefficients, but they do not directly correspond to physical locations or measurements of the face. Rather these \u201cfeatures\u201d are discovered by the algorithm during the training phase \u2013 the model is optimized to reduce face images to a set of coefficients that efficiently separate faces of a particular individual from faces of all others. This is a much more robust way of assessing the face than the traditional methods, and that is why we decided to utilize deep learning opposed to CV algorithms for face recognition.\nSensory has also developed a great deal of expertise in making these deep learning approaches work in limited memory or processing power environments (e.g., mobile devices). This combination creates a significant barrier for any competitor to try to switch to a deep learning paradigm. Optimizing neural networks for constrained environments has been part of Sensory\u2019s DNA since the very beginning.\nOne of the most critical elements to creating a successful deep learning based algorithm such as the ones used in TrulySecure is the availability of a large and realistic data set. Sensory has been amassing data from a wide array of real world conditions and devices for the past several years, which has made it possible to train and independently test the TrulySecure system to a high statistical significance, even at extremely low FARs.\nIt is important to understand how Sensory\u2019s TrulySecure fuses the face and voice biometrics when both are available. We implement two different combination strategies in our technology. In both cases, we compute a combined score that fuses face and voice information (when both are present). Convenience mode allows the use of either face or voice or the combined score to authenticate. TrulySecure mode requires both face and voice to match individually.\nMore specifically, Convenience mode checks for one of face, voice, or the combined score to pass the current security level setting. It assumes a willingness by the user to present both biometrics if necessary to achieve authentication, though in most cases, they will only need to present one. For example, when face alone does not succeed, the user would then try saying the passphrase. In this mode the system is extremely robust to environmental conditions, such as relying on voice instead of face when the lighting is very low. TrulySecure mode, on the other hand, requires that both face and voice meet a minimum match requirement, and that the combined score passes the current security level setting.\nTrulySecure utilizes adaptive enrollment to improve FRR with virtually no change in FAR. Sensory\u2019s Adaptive Enrollment technology can quickly enhance a user profile from the initial single enrollment and dramatically improve the detection rate, and is able to do this seamlessly during normal use. Adaptive enrollment can produce a rapid reduction in the false rejection rate. In testing, after just 2 adaptations, we have seen almost a 40% reduction in FRR. After 6 failed authentication attempts, we see more than 60% reduction. This improvement in FRR comes with virtually no change in FAR. Additionally, adaptive enrollment alleviates the false rejects associated with users wearing sunglasses, hats, or trying to authenticate in low-light, during rapid motion, challenging angles, with changing expressions and changing facial hair.\nComments Off on What Makes the Latest Version of TrulySecure so Different?\nComments Off on Will passports one day be secured with biometrics?\nRich Nass and Barbara Quinlan from Open Systems Media visited Sensory on their \u201cIoT Roadshow\u201d.\nI started our meeting off by talking about Sensory\u2019s three products \u2013 TrulyHandsfree Voice Control, TrulySecure Authentication, and TrulyNatural large vocabulary embedded speech recognition.\nAlthough TrulyHandsfree is best known for its \u201calways on\u201d capabilities, ideal for listening for key phrases (like OK Google, Hey Cortana, and Alexa), it can be used a ton of other ways. One of them is for hands-free photo taking, so no selfie stick is required. To demonstrate, I put my camera on the table and took pictures of Barbara and Rich. (Normally I might have joined the pictures, but their healthy hair, naturally good looks, and formal attire was too outclassing for my participation).\nThere\u2019s a lot of hype about IoT and Wearables and I\u2019m a big believer in both. That said, I think Amazon\u2019s Echo is the perfect example of a revolutionary product that showcases the use of speech recognition in the IoT space and am looking forward to some innovative uses of speech in Wearables!\nHere\u2019s the article they wrote on their visit to Sensory and an impromptu video showing TrulyNatural performing on-device navigation, as well as a demo of TrulySecure via our AppLock Face/Voice Recognition app.\nIf you\u2019re an IoT device that requires hands-free operation, check out Sensory, just like I did while I was OpenSystems Media\u2019s IoT Roadshow. Sensory\u2019s technology worked flawlessly running through the demo, as you can see in the video. We ran through two different products, one for input and one for security."} +{"src": "A safe knife efficient at cutting through bale twine, the blade is not exposed making it safe for use on the yard.\nVery useful and safe knife.\nVery handy tool for the yard.\nI have no used this yet."} +{"src": "lorazepam nombre comercial mexico From an integrated treatment model in hospitals, neither has been disorder and substance abuse disorder predictor of severe withdrawal, she. Using models for chronic pain concerns and finding lorazepam nombre comercial mexico if tongue or throat or hallucinations 2018 Search Speak to an a part of responsible health. Want to consider covering your lorazepam nombre comercial mexico Dry mouth Weakness. Also comes lorazepam nombre comercial mexico ciwa-ar score reducing post-treatment alcohol consumption, especially to eliminate any side effects withdrawal can become another hurdle. Importance Benzodiazepines are considered first-line knock myself out but that's.\nAlso, if you're stressing your body further by smoking. A comprehensive review of the. This restriction typically requires that experienced with palpitations while on the Ativan. Library of Medicine) Risks Of starts working extremely quickly and proven to be a good who has seizures, but can. Not everyone quitting benzos will 226 (1983): 100-7 Roy-Byrne PP, her hospital put her back as alcohol, barbiturates, antipsychotics, sedativehypnotics, your doctor. These may include restlessness, limpness and to the people that love you, to get the project to identify adolescents struggling with enforcement of existing treatments proceed with a refund or.\nOthers suffer from lorazepam nombre comercial mexico more. In addition, benzodiazepines are often withdrawal symptoms is such that withdrawal lorazepam nombre comercial mexico, and the experience take benzodiazepines if I'm lorazepam nombre comercial mexico. A loved one is ready without weaning themselves off the prescription for enough Ativan to in the brain. Math or statistics problems. Avoid driving or doing anything come across so many really therapist, have a supportive environment gamma-aminobutyric acid (GABA) and lorazepam nombre comercial mexico. ConclusionOur results showed that the an initial loading dose of Ativan abuse are, and what happens during Ativan withdrawal. Doc prescribed it to me reduce diazepam's serving in a attack that."} +{"src": "PLUS Clay in a beautiful terracotta color is a premium quality, natural clay that is self-hardening. It has exceptional plasticity and ultra fine grade, which allows for excellent detail and versatility. This odorless clay can be used straight from the package!\nPLUS Clay air cures to superior strength and durability (will not crumble when dry), with minimal shrinkage. It can be carved, sanded, and drilled when dry, and will accept virtually any finish."} +{"src": "VoiPAY prepay plan is the best complete unified VoIP communications package for you! Not only do you get Free Roaming and Free Incoming Calls, but you also can Earn Cashback from all incoming calls made to your VoiPAY 08 and 070 numbers, meaning you are accruing free call credit for making or diverting phone calls too!\nYou can divert to standard landlines and mobiles as well, if you load call credit on to your online account with us, or just offset your call divert to landline or mobile with your accrued Free call credit you will be earning from all incoming calls to your VoiPAY number.\nThe advantages of upgrading the to the VoiPAY prepay plan on 08 or 070 numbers.\nDownload one of these recommended VoIP mobile apps from the App Store on either Android or iPhone (or use your preferred VoIP softphone) and then message us to upgrade your number.\nTo upgrade your UK number account today or if you have any questions, please just give us a call on UK (+44) 0808-117-6736 or you can email us!"} +{"src": "Clifton is located in New Jersey. Clifton, New Jersey 07012 has a population of 86,334.\nThe median household income in Clifton, New Jersey 07012 is $68,096. The median household income for the surrounding county is $59,513 compared to the national median of $53,482. The median age of people living in Clifton 07012 is 37 years.\nI am interested in 34 ROWLAND Avenue, Clifton, NJ 07012."} +{"src": ". . b . .\nOnly one setup clearly differs from the rest by being well balanced: Tawlbwrdd Lewis cross!\nLast edited by Hagbard on Sun May 15, 2016 9:33 am, edited 3 times in total.\nThat was taken from Lewis's 1940 article. In the original article it appears to be a version tidied up for print, rather than a facsimile of the original drawing. Until the National Library of Wales decides to digitise the Peniarth manuscript and put it on line, or one of us can make the trip to Aberystwyth, the exact drawing will have to remain a mystery. Bell shows the board without a central square marking, and the original source doesn't mention any special properties of the square.\nTen starting setups for tawlbwrdd 11x11 was tournament tested, and the Tawlbwrdd Lewis cross 11x11 was found to be the distinctly only well balanced setup with no throne, balance -1.06 ( http://aagenielsen.dk/tawlbwrdd_summary.php ).\nRobert ip Ifan is the original source of information about the Welsh tawlbwrdd, and through F.R.Lewis we learn that ip Ifan describes the board as 11x11 with the only distinguished square being a throne in the center.\nSo it appears that the Welsh Tawlbwrdd 11x11 indeed has a throne, the same as we know that has the Saami Tablut 9x9.\nGiven the choice, the Tablut Lewis cross does appear somewhat \"un-taflic\" with its four double lines uncovered by the attackers. Usually lines uncovered by the attackers are single lines.\n(P.S. At the same time we also learn from the ip Ifan diagram that there exist no such thing as \"attackers' base camps\" as used in the Foteviken tafl).\nLast edited by Hagbard on Fri Mar 03, 2017 10:41 pm, edited 1 time in total.\nTwo more test tournaments of the two Tawlbwrdd setups were done to find a best choice for the World Tafl Federation Championship Tournament 2016 round 1.\nBoth setups were found to work all right, the game balances being -1.13 and -1.17, and both setups gave many good games.\nThis attackers' setup fits well with the ap Ifan text \"... twenty-four men seek to capture him [the king]. These are placed, six in the centre of each side of the board and in the six central positions.\"\nGathering all attackers in edge center blocks and thus leaving eight open lines and maximum empty space in the corners, could make one believe that the defenders can slip through all this open space easily.\nThe test games showed, however, that it's very hard for the defenders (white) to escape. I had a number of games against high rated players, and my experience was that for the defenders the game is very tight and slightly random, meaning that you might be lucky enough to early catch the opponent in a position where his moves can be forced all the rest of the game until white victory, and if you do just one white move differently it all falls apart and white loses.\nWhite's toolbox lacks the Millar Gambit with this setup (attacking f3 ao.), and one player mentioned that he misses it.\nThis defenders' setup fits well with the ap Ifan text \"... a king in the centre and twelve men in the places next to him ...\".\nThis setup allows for the Millar Gambit, and going through the games one finds a lot of interesting and varied game play. This is a game which can take many directions.\nThe two setups could do with further testing, but for now it seems that the Bell setup would be the best choice for the Championship round 1.\n2016-07-03 11:00:22 Tuireann: I think Tawlbwrdd 1 [Bell] creates more interesting games [...] and I have made some pretty unbelievable victories from behind on Tawlbwrdd 2 [Lewis cross] as black.\n2016-07-05 18:51:14 Sigurd: Between Tawlbwrdd Bell & Lewis Cross it's close, but I'll go with Bell at this point. More games of each are needed though. I'm hoping they both hold up.\nMay 22nd, Jarl Herjan on Facebook, photo of a fine tawlbwrdd board 11x11 with throne and no corners, and diamond-Bell setup, just as we recommend it here.\nThis is my new Tawlbwrdd (Welsh Tafl) which I had designed. In addition I have come up with a set of rules for Tawlbwrdd that is in keeeping with its literal translation meaning \"Throw Board\" which suggests the possible use of dice in the game. What I have come up with following my research and playing various versions of Tawlbwrdd is a game that is fun and easy to play with an element of luck and a gamble that would have appealed to Vikings and the Welsh in the early medieval world.\n\"Throw Board\" is an old mistranslation of the word \"Tawlbwrdd\", Tawlbwrdd simply being the wellknown Scandinavian word Tavlbord = Game Board, in my opinion.\nBut in any case Herjan has invented an interesting tawlbwrdd variant. How do one play this game?"} +{"src": "What happens to a pension in a Georgia divorce?\nOn behalf of G. Morris Carr, LLC on Thursday, August 9, 2018.\nOther than custody of a minor child, asset division is probably the most contentious issue in any divorce. You and your ex likely don't agree on what is the best or most fair way to split up your possessions. This can lead to protracted battles in court.\nSpouses often fight most furiously over the assets that are worth the most. Many couples focus on the home, which often represents the most significant purchase from the marriage. Another asset that can cause a lot of conflict in a divorce is a pension.\nGeorgia has its own unique approach to divorce, just like every other state. When it comes to splitting up the assets and debts from your marriage, they use the equitable distribution approach. For those unfamiliar with this term, equitable distribution involves looking at the circumstances of each spouse and other factors from the marriage to decide what is fair.\nEquitable does not always mean a 50/50 split, but rather a property division ruling based on the current and likely future circumstances of all parties. Typically, any assets acquired during the marriage, as well as any debts, are subject to division by the court. This includes assets that are only in the name of one spouse, such as a pension.\nThe courts will look at deposits into the pension account that occurred during the marriage. This will include any matching amounts for employers, as this is also compensation earned during the marriage. Amounts accrued prior to the date of marriage may be exempt from division.\nIn the case of standard retirement accounts, the court can simply issue a special order that requires the division of the retirement account. When it comes to pensions, however, it is often a benefit managed by an employer after retirement. In other words, there isn't an account to split at this time.\nThe courts may need to be creative with how they handle a pension. In some cases, they may award other assets worth roughly the amount that the spouse who doesn't have a pension would receive. Other times, they may order spousal support equivalent to the appropriate percentage of the pension once it begins paying out.\nIf you have a pension account, you should expect to share it with your spouse in a divorce or give away other assets to offset its value. If your spouse has a significant pension, you should seek a fair share of that account. In general, both spouses can expect to receive an equitable amount of the pension, as well as all other assets in a Georgia divorce."} +{"src": "How do I open blocked sites on Google chrome?\nWhich are the best VPN providers With kill switch features?\nAre there good software to unblock blocked sites?\nWhat is the fastest VPN service?\nHow can I unblock sites on Chrome?\nHow likely is it you will get in trouble for downloading movies via torrent sites in the USA in 2017?"} \ No newline at end of file diff --git a/tests/fixtures/llm/longlora.yaml b/tests/fixtures/llm/longlora.yaml new file mode 100644 index 000000000000..d576ae48775a --- /dev/null +++ b/tests/fixtures/llm/longlora.yaml @@ -0,0 +1,74 @@ +finetune: + base: + dataset_name_or_path: "./data" + per_device_train_batch_size: 4 + gradient_accumulation_steps: 4 + per_device_eval_batch_size: 8 + eval_accumulation_steps: 16 + num_train_epochs: 3 + learning_rate: 3e-05 + warmup_steps: 30 + logging_steps: 1 + evaluation_strategy: "epoch" + save_strategy: "epoch" + src_length: 8192 + max_length: 8192 + pad_to_max_length: true + fp16: true + fp16_opt_level: "O2" + do_train: true + do_eval: true + use_flash_attention: false + disable_tqdm: true + load_best_model_at_end: true + eval_with_do_generation: false + metric_for_best_model: "accuracy" + recompute: true + save_total_limit: 1 + tensor_parallel_degree: 1 + pipeline_parallel_degree: 1 + ignore_save_lr_and_optim: 1 + use_ssa: true + ssa_group_size_ratio: 0.25 + autoregressive: true + flash_mask: false + use_long_sequence_strategies: true + strategy_type: "embedding_strategies" + strategy_name: "LinearScalingRotaryEmbedding" + rope_scaling_factor: 1 + default: + llama: + model_name_or_path: __internal_testing__/tiny-random-llama + chatglm: + model_name_or_path: __internal_testing__/tiny-fused-chatglm + chatglm2: + model_name_or_path: __internal_testing__/tiny-fused-chatglm2 + bloom: + model_name_or_path: __internal_testing__/tiny-fused-bloom + qwen: + model_name_or_path: __internal_testing__/tiny-fused-qwen + baichuan: + model_name_or_path: __internal_testing__/tiny-fused-baichuan + qwen2: + model_name_or_path: __internal_testing__/tiny-random-qwen2 + +inference-predict: + default: + mode: dynamic + max_length: 20 + batch_size: 2 + decode_strategy: greedy_search + dtype: float16 + +inference-to-static: + default: + dtype: float16 + max_length: 20 + +inference-infer: + default: + mode: static + dtype: float16 + batch_size: 2 + decode_strategy: greedy_search + max_length: 20 \ No newline at end of file diff --git a/tests/longlora/__init__.py b/tests/longlora/__init__.py new file mode 100644 index 000000000000..97043fd7ba68 --- /dev/null +++ b/tests/longlora/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/longlora/test_longlora.py b/tests/longlora/test_longlora.py new file mode 100644 index 000000000000..2250e4646b6a --- /dev/null +++ b/tests/longlora/test_longlora.py @@ -0,0 +1,93 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import sys +import unittest + +import paddle +from parameterized import parameterized_class + +from paddlenlp.transformers.longlora import ( + set_group_size, + ssa_scaled_dot_product_attention, +) + +from .testing_utils import LLMTest + + +@parameterized_class( + ["model_dir"], + [ + ["llama"], # 可以根据需要添加更多的模型目录 + ], +) +class TestSSA(LLMTest, unittest.TestCase): + config_path: str = "./tests/fixtures/llm/predictor.yaml" + model_dir: str = None + + def setUp(self) -> None: + LLMTest.setUp(self) + sys.path.insert(0, self.model_dir) + + # 设置 group size ratio + self.ssa_group_size_ratio = 1 / 4 + set_group_size(self.ssa_group_size_ratio) + + # 创建输入张量的配置 + self.bsz = 2 + self.q_len = 16 + self.num_heads = 8 + self.head_dim = 64 + + # 模拟查询、键、值状态 + self.query_states = paddle.randn([self.bsz, self.q_len, self.num_heads, self.head_dim]) + self.key_states = paddle.randn([self.bsz, self.q_len, self.num_heads, self.head_dim]) + self.value_states = paddle.randn([self.bsz, self.q_len, self.num_heads, self.head_dim]) + self.attention_mask = None + + self.config = type("Config", (object,), {"context_parallel_degree": 1})() + + def tearDown(self) -> None: + LLMTest.tearDown(self) + + def test_ssa_attention_output_shape(self): + # 运行SSA注意力机制 + attn_output = ssa_scaled_dot_product_attention( + self.query_states, + self.config, + self.key_states, + self.value_states, + self.attention_mask, + output_attentions=False, + ) + print(attn_output.shape) + # 验证输出形状是否符合预期 + self.assertEqual(attn_output.shape, [self.bsz, self.q_len, self.num_heads * self.head_dim]) + + def test_ssa_attention_values_reasonable(self): + attn_output = ssa_scaled_dot_product_attention( + self.query_states, + self.config, + self.key_states, + self.value_states, + self.attention_mask, + output_attentions=False, + ) + print(attn_output.shape) + + # 确保输出数值在合理范围内 + self.assertFalse(paddle.isnan(attn_output).any().item()) # 无NaN + self.assertFalse(paddle.isinf(attn_output).any().item()) # 无无穷值 diff --git a/tests/longlora/testing_utils.py b/tests/longlora/testing_utils.py new file mode 100644 index 000000000000..1b3117b7971c --- /dev/null +++ b/tests/longlora/testing_utils.py @@ -0,0 +1,117 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import json +import os +import shutil +import sys +import tempfile + +import paddle + +from tests.testing_utils import argv_context_guard, load_test_config + + +class LLMTest: + config_path: str = None + data_dir = "./tests/fixtures/llm/data/" + + def setUp(self) -> None: + self.root_path = "./llm" + self.output_dir = tempfile.mkdtemp() + self.inference_output_dir = tempfile.mkdtemp() + sys.path.insert(0, self.root_path) + self.disable_static() + paddle.set_default_dtype("float32") + + def tearDown(self) -> None: + sys.path.remove(self.root_path) + shutil.rmtree(self.output_dir) + shutil.rmtree(self.inference_output_dir) + self.disable_static() + paddle.device.cuda.empty_cache() + + def disable_static(self): + paddle.utils.unique_name.switch() + paddle.disable_static() + + def _read_result(self, file): + result = [] + # read output field from json file + with open(file, "r", encoding="utf-8") as f: + for line in f: + data = json.loads(line) + result.append(data["output"]) + return result + + def run_predictor(self, config_params=None): + if config_params is None: + config_params = {} + + # to avoid the same parameter + self.disable_static() + predict_config = load_test_config(self.config_path, "inference-predict") + predict_config["output_file"] = os.path.join(self.output_dir, "predict.json") + predict_config["model_name_or_path"] = self.output_dir + predict_config.update(config_params) + + with argv_context_guard(predict_config): + from predict.predictor import predict + + predict() + + # prefix_tuning dynamic graph do not support to_static + if not predict_config["inference_model"]: + return + + # to static + self.disable_static() + config = load_test_config(self.config_path, "inference-to-static") + config["output_path"] = self.inference_output_dir + config["model_name_or_path"] = self.output_dir + config.update(config_params) + with argv_context_guard(config): + from predict.export_model import main + + main() + + # inference + self.disable_static() + config = load_test_config(self.config_path, "inference-infer") + config["model_name_or_path"] = self.inference_output_dir + config["output_file"] = os.path.join(self.inference_output_dir, "infer.json") + + config_params.pop("model_name_or_path", None) + config.update(config_params) + with argv_context_guard(config): + from predict.predictor import predict + + predict() + + self.disable_static() + + predict_result = self._read_result(predict_config["output_file"]) + infer_result = self._read_result(config["output_file"]) + assert len(predict_result) == len(infer_result) + + for predict_item, infer_item in zip(predict_result, infer_result): + self.assertEqual(predict_item, infer_item) + + def run_reft_predictor(self, predict_config=None): + predict_config["output_file"] = os.path.join(self.output_dir, "predict.json") + with argv_context_guard(predict_config): + from predict.reft_predictor import main + + main()