diff --git a/scripts/cal_flops.py b/scripts/cal_flops.py index 627b5534..32526d89 100644 --- a/scripts/cal_flops.py +++ b/scripts/cal_flops.py @@ -1,7 +1,7 @@ # coding=utf-8 # Copyright 2024 Microsoft Corporation and the LlamaFactory team. # -# This code is inspired by Microsoft's DeepSpeed library. +# This code is inspired by the Microsoft's DeepSpeed library. # https://www.deepspeed.ai/tutorials/flops-profiler/ # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/cal_lr.py b/scripts/cal_lr.py index ff21d27c..ad6992cb 100644 --- a/scripts/cal_lr.py +++ b/scripts/cal_lr.py @@ -1,7 +1,7 @@ # coding=utf-8 # Copyright 2024 imoneoi and the LlamaFactory team. # -# This code is inspired by imoneoi's OpenChat library. +# This code is inspired by the imoneoi's OpenChat library. # https://github.com/imoneoi/openchat/blob/3.6.0/ochat/training_deepspeed/train.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/llama_pro.py b/scripts/llama_pro.py index f315335a..395375ef 100644 --- a/scripts/llama_pro.py +++ b/scripts/llama_pro.py @@ -1,7 +1,7 @@ # coding=utf-8 # Copyright 2024 Tencent Inc. and the LlamaFactory team. # -# This code is inspired by Tencent's LLaMA-Pro library. +# This code is inspired by the Tencent's LLaMA-Pro library. # https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/data/processors/pretrain.py b/src/llamafactory/data/processors/pretrain.py index fb4c840c..67d6009b 100644 --- a/src/llamafactory/data/processors/pretrain.py +++ b/src/llamafactory/data/processors/pretrain.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/eval/evaluator.py b/src/llamafactory/eval/evaluator.py index bbd7a44b..d3140793 100644 --- a/src/llamafactory/eval/evaluator.py +++ b/src/llamafactory/eval/evaluator.py @@ -1,6 +1,6 @@ # Copyright 2024 the LlamaFactory team. # -# This code is inspired by Dan's test library. +# This code is inspired by the Dan's test library. # https://github.com/hendrycks/test/blob/master/evaluate_flan.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/extras/packages.py b/src/llamafactory/extras/packages.py index 35f546ab..0a84a293 100644 --- a/src/llamafactory/extras/packages.py +++ b/src/llamafactory/extras/packages.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/utils/import_utils.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/hparams/data_args.py b/src/llamafactory/hparams/data_args.py index 95284766..39290e21 100644 --- a/src/llamafactory/hparams/data_args.py +++ b/src/llamafactory/hparams/data_args.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/model/model_utils/checkpointing.py b/src/llamafactory/model/model_utils/checkpointing.py index e4e84b12..f5314125 100644 --- a/src/llamafactory/model/model_utils/checkpointing.py +++ b/src/llamafactory/model/model_utils/checkpointing.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's Transformers and PEFT library. +# This code is inspired by the HuggingFace's Transformers and PEFT library. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/modeling_utils.py # https://github.com/huggingface/peft/blob/v0.10.0/src/peft/utils/other.py # diff --git a/src/llamafactory/model/model_utils/longlora.py b/src/llamafactory/model/model_utils/longlora.py index 7af43dcf..af30bd50 100644 --- a/src/llamafactory/model/model_utils/longlora.py +++ b/src/llamafactory/model/model_utils/longlora.py @@ -1,7 +1,9 @@ -# Copyright 2024 EleutherAI, HuggingFace Inc., and the LlamaFactory team. +# Copyright 2024 EleutherAI, HuggingFace Inc., Yukang Chen, and the LlamaFactory team. # -# This code is based on the EleutherAI's GPT-NeoX and HuggingFace's Transformers libraries. +# This code is based on the EleutherAI's GPT-NeoX and the HuggingFace's Transformers libraries. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llama/modeling_llama.py +# This code is also inspired by the original LongLoRA implementation. +# https://github.com/dvlab-research/LongLoRA/blob/main/llama_attn_replace.py # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/quantization.py b/src/llamafactory/model/model_utils/quantization.py index 9e6b9da4..0a0fca34 100644 --- a/src/llamafactory/model/model_utils/quantization.py +++ b/src/llamafactory/model/model_utils/quantization.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's Optimum library. +# This code is inspired by the HuggingFace's Optimum library. # https://github.com/huggingface/optimum/blob/v1.20.0/optimum/gptq/data.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/model/model_utils/visual.py b/src/llamafactory/model/model_utils/visual.py index 37237485..700bf470 100644 --- a/src/llamafactory/model/model_utils/visual.py +++ b/src/llamafactory/model/model_utils/visual.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's Transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/llava/modeling_llava.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/dpo/workflow.py b/src/llamafactory/train/dpo/workflow.py index 8c3c2eb1..431b5285 100644 --- a/src/llamafactory/train/dpo/workflow.py +++ b/src/llamafactory/train/dpo/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/dpo.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/kto/trainer.py b/src/llamafactory/train/kto/trainer.py index 6e96fc0c..91d68975 100644 --- a/src/llamafactory/train/kto/trainer.py +++ b/src/llamafactory/train/kto/trainer.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/kto_trainer.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -114,8 +114,8 @@ class CustomKTOTrainer(KTOTrainer): def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: super()._save(output_dir, state_dict) + output_dir = output_dir if output_dir is not None else self.args.output_dir if self.processor is not None: - output_dir = output_dir if output_dir is not None else self.args.output_dir getattr(self.processor, "image_processor").save_pretrained(output_dir) def forward( diff --git a/src/llamafactory/train/kto/workflow.py b/src/llamafactory/train/kto/workflow.py index 8a7af6d4..8182a184 100644 --- a/src/llamafactory/train/kto/workflow.py +++ b/src/llamafactory/train/kto/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/kto.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/ppo/trainer.py b/src/llamafactory/train/ppo/trainer.py index 61420f3b..df4a37be 100644 --- a/src/llamafactory/train/ppo/trainer.py +++ b/src/llamafactory/train/ppo/trainer.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/trl/trainer/ppo_trainer.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/ppo/workflow.py b/src/llamafactory/train/ppo/workflow.py index 891d539a..4f4d2820 100644 --- a/src/llamafactory/train/ppo/workflow.py +++ b/src/llamafactory/train/ppo/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's TRL library. +# This code is inspired by the HuggingFace's TRL library. # https://github.com/huggingface/trl/blob/v0.8.0/examples/scripts/ppo.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/pt/workflow.py b/src/llamafactory/train/pt/workflow.py index f1df314e..b84a0e7d 100644 --- a/src/llamafactory/train/pt/workflow.py +++ b/src/llamafactory/train/pt/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/language-modeling/run_clm.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/rm/trainer.py b/src/llamafactory/train/rm/trainer.py index 14695d7d..7f91e5f5 100644 --- a/src/llamafactory/train/rm/trainer.py +++ b/src/llamafactory/train/rm/trainer.py @@ -1,6 +1,6 @@ # Copyright 2024 the LlamaFactory team. # -# This code is inspired by CarperAI's trlx library. +# This code is inspired by the CarperAI's trlx library. # https://github.com/CarperAI/trlx/blob/v0.7.0/examples/summarize_rlhf/reward_model/reward_model.py # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -89,8 +89,8 @@ class PairwiseTrainer(Trainer): def _save(self, output_dir: Optional[str] = None, state_dict: Optional[Dict[str, "torch.Tensor"]] = None) -> None: super()._save(output_dir, state_dict) + output_dir = output_dir if output_dir is not None else self.args.output_dir if self.processor is not None: - output_dir = output_dir if output_dir is not None else self.args.output_dir getattr(self.processor, "image_processor").save_pretrained(output_dir) def compute_loss( diff --git a/src/llamafactory/train/rm/workflow.py b/src/llamafactory/train/rm/workflow.py index 75c0a2bf..6f24e964 100644 --- a/src/llamafactory/train/rm/workflow.py +++ b/src/llamafactory/train/rm/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 the LlamaFactory team. # -# This code is inspired by CarperAI's trlx library. +# This code is inspired by the CarperAI's trlx library. # https://github.com/CarperAI/trlx/blob/v0.7.0/examples/summarize_rlhf/reward_model/train_reward_model_gptj.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/llamafactory/train/sft/metric.py b/src/llamafactory/train/sft/metric.py index d2147c22..95bfcb69 100644 --- a/src/llamafactory/train/sft/metric.py +++ b/src/llamafactory/train/sft/metric.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc., THUDM, and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library and THUDM's ChatGLM implementation. +# This code is inspired by the HuggingFace's transformers library and the THUDM's ChatGLM implementation. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py # https://github.com/THUDM/ChatGLM-6B/blob/main/ptuning/main.py # diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py index dfc71cfb..885bc7ac 100644 --- a/src/llamafactory/train/sft/workflow.py +++ b/src/llamafactory/train/sft/workflow.py @@ -1,6 +1,6 @@ # Copyright 2024 HuggingFace Inc. and the LlamaFactory team. # -# This code is inspired by HuggingFace's transformers library. +# This code is inspired by the HuggingFace's transformers library. # https://github.com/huggingface/transformers/blob/v4.40.0/examples/pytorch/summarization/run_summarization.py # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/tests/model/test_base.py b/tests/model/test_base.py index 2deedde2..954492ef 100644 --- a/tests/model/test_base.py +++ b/tests/model/test_base.py @@ -41,7 +41,7 @@ def compare_model(model_a: "torch.nn.Module", model_b: "torch.nn.Module"): state_dict_b = model_b.state_dict() assert set(state_dict_a.keys()) == set(state_dict_b.keys()) for name in state_dict_a.keys(): - assert torch.allclose(state_dict_a[name], state_dict_b[name]) is True + assert torch.allclose(state_dict_a[name], state_dict_b[name]) @pytest.fixture