From 8cace7780867dd78760f40c46fd5b6ddd47dea0a Mon Sep 17 00:00:00 2001
From: hiyouga <hiyouga@buaa.edu.cn>
Date: Tue, 12 Dec 2023 11:44:30 +0800
Subject: [PATCH] update readme

---
 README.md                               |  6 +++---
 README_zh.md                            |  6 +++---
 src/llmtuner/hparams/finetuning_args.py |  4 ----
 src/llmtuner/model/utils.py             | 11 -----------
 4 files changed, 6 insertions(+), 21 deletions(-)
diff --git a/README.md b/README.md
index 665c1426..a298d9f5 100644
--- a/README.md
+++ b/README.md
@@ -55,14 +55,14 @@ Compared to ChatGLM's [P-Tuning](https://github.com/THUDM/ChatGLM2-6B/tree/main/
 
 ## Changelog
 
-[23/12/12] We supported fine-tuning the latest MoE model **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)** in our framework.
+[23/12/12] We supported fine-tuning the latest MoE model **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)** in our framework. See hardware requirement [here](#hardware-requirement).
 
 [23/12/01] We supported downloading pre-trained models from the **[ModelScope Hub](https://modelscope.cn/models)** for Chinese mainland users. See [this tutorial](#use-modelscope-models-optional) for usage.
 
-[23/10/21] We supported **[NEFTune](https://arxiv.org/abs/2310.05914)** trick for fine-tuning. Try `--neftune_noise_alpha` argument to activate NEFTune, e.g., `--neftune_noise_alpha 5`.
-
 <details><summary>Full Changelog</summary>
 
+[23/10/21] We supported **[NEFTune](https://arxiv.org/abs/2310.05914)** trick for fine-tuning. Try `--neftune_noise_alpha` argument to activate NEFTune, e.g., `--neftune_noise_alpha 5`.
+
 [23/09/27] We supported **$S^2$-Attn** proposed by [LongLoRA](https://github.com/dvlab-research/LongLoRA) for the LLaMA models. Try `--shift_attn` argument to enable shift short attention.
 
 [23/09/23] We integrated MMLU, C-Eval and CMMLU benchmarks in this repo. See [this example](#evaluation) to evaluate your models.
diff --git a/README_zh.md b/README_zh.md
index 5151ab99..bd221800 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -55,14 +55,14 @@ https://github.com/hiyouga/LLaMA-Factory/assets/16256802/6ba60acc-e2e2-4bec-b846
 
 ## 更新日志
 
-[23/12/12] 我们支持了微调最新的混合专家模型 **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)**。
+[23/12/12] 我们支持了微调最新的混合专家模型 **[Mixtral 8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)**。硬件需求请查阅[此处](#硬件依赖)。
 
 [23/12/01] 我们支持了从 **[魔搭社区](https://modelscope.cn/models)** 下载预训练模型。详细用法请参照 [此教程](#使用魔搭社区可跳过)。
 
-[23/10/21] 我们支持了 **[NEFTune](https://arxiv.org/abs/2310.05914)** 训练技巧。请使用 `--neftune_noise_alpha` 参数启用 NEFTune，例如 `--neftune_noise_alpha 5`。
-
 <details><summary>展开日志</summary>
 
+[23/10/21] 我们支持了 **[NEFTune](https://arxiv.org/abs/2310.05914)** 训练技巧。请使用 `--neftune_noise_alpha` 参数启用 NEFTune，例如 `--neftune_noise_alpha 5`。
+
 [23/09/27] 我们针对 LLaMA 模型支持了 [LongLoRA](https://github.com/dvlab-research/LongLoRA) 提出的 **$S^2$-Attn**。请使用 `--shift_attn` 参数以启用该功能。
 
 [23/09/23] 我们在项目中集成了 MMLU、C-Eval 和 CMMLU 评估集。使用方法请参阅[此示例](#模型评估)。
diff --git a/src/llmtuner/hparams/finetuning_args.py b/src/llmtuner/hparams/finetuning_args.py
index 06e5b2c1..ae3a6f79 100644
--- a/src/llmtuner/hparams/finetuning_args.py
+++ b/src/llmtuner/hparams/finetuning_args.py
@@ -141,10 +141,6 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments):
         default=False,
         metadata={"help": "Whether to upcast the layernorm weights in fp32."}
     )
-    neft_alpha: Optional[float] = field(
-        default=0,
-        metadata={"help": "The alpha parameter to control the noise magnitude in NEFTune."}
-    )
     export_dir: Optional[str] = field(
         default=None,
         metadata={"help": "Path to the directory to save the exported model."}
diff --git a/src/llmtuner/model/utils.py b/src/llmtuner/model/utils.py
index d897c849..a8853b1d 100644
--- a/src/llmtuner/model/utils.py
+++ b/src/llmtuner/model/utils.py
@@ -148,17 +148,6 @@ def prepare_model_for_training(
                 param.data = param.data.to(torch.float32)
         logger.info("Upcasting weights in layernorm in float32.")
 
-    if finetuning_args.neft_alpha > 1e-6:
-        def neftune_forward_hook(module: torch.nn.Module, args: Tuple[torch.Tensor], output: torch.Tensor):
-            if module.training:
-                dims = torch.tensor(output.size(1) * output.size(2))
-                mag_norm = finetuning_args.neft_alpha / torch.sqrt(dims)
-                output = output + torch.zeros_like(output).uniform_(-mag_norm, mag_norm)
-            return output
-
-        model.get_input_embeddings().register_forward_hook(neftune_forward_hook)
-        logger.info("Using noisy embedding with alpha={:.2f}".format(finetuning_args.neft_alpha))
-
     if use_gradient_checkpointing and getattr(model, "supports_gradient_checkpointing", False):
         if hasattr(model, "enable_input_require_grads"):
             model.enable_input_require_grads()