From d11a5454633be9f0600cbd1ab7a26c9c8fa5ed80 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Thu, 28 Sep 2023 14:39:16 +0800 Subject: [PATCH] fix #1068 #1074 --- README.md | 4 ++-- README_zh.md | 4 ++-- evaluation/cmmlu/cmmlu.py | 4 ++++ src/llmtuner/extras/constants.py | 2 ++ 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e4a96bed..ef434ac1 100644 --- a/README.md +++ b/README.md @@ -14,11 +14,11 @@ ## Changelog -[23/09/27] We supported **$S^2$-Attn** proposed by [LongLoRA](https://github.com/dvlab-research/LongLoRA). Try `--shift_attn` argument to enable shift short attention. +[23/09/27] We supported **$S^2$-Attn** proposed by [LongLoRA](https://github.com/dvlab-research/LongLoRA) for the LLaMA models. Try `--shift_attn` argument to enable shift short attention. [23/09/23] We integrated MMLU, C-Eval and CMMLU benchmarks in this repo. See [this example](#evaluation) to evaluate your models. -[23/09/10] We supported using **[FlashAttention](https://github.com/Dao-AILab/flash-attention)** for the LLaMA models. Try `--flash_attn` argument to enable FlashAttention-2 if you are using RTX4090, A100 or H100 GPUs. +[23/09/10] We supported using **[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)** for the LLaMA models. Try `--flash_attn` argument to enable FlashAttention-2 if you are using RTX4090, A100 or H100 GPUs. [23/08/18] We supported **resuming training**, upgrade `transformers` to `4.31.0` to enjoy this feature. diff --git a/README_zh.md b/README_zh.md index 04802e90..35f72c47 100644 --- a/README_zh.md +++ b/README_zh.md @@ -14,11 +14,11 @@ ## 更新日志 -[23/09/27] 我们支持了 [LongLoRA](https://github.com/dvlab-research/LongLoRA) 提出的 **$S^2$-Attn**。请使用 `--shift_attn` 参数以启用该功能。 +[23/09/27] 我们针对 LLaMA 模型支持了 [LongLoRA](https://github.com/dvlab-research/LongLoRA) 提出的 **$S^2$-Attn**。请使用 `--shift_attn` 参数以启用该功能。 [23/09/23] 我们在项目中集成了 MMLU、C-Eval 和 CMMLU 评估集。使用方法请参阅[此示例](#模型评估)。 -[23/09/10] 我们支持了 LLaMA 模型的 **[FlashAttention](https://github.com/Dao-AILab/flash-attention)**。如果您使用的是 RTX4090、A100 或 H100 GPU,请使用 `--flash_attn` 参数以启用 FlashAttention-2(实验性功能)。 +[23/09/10] 我们针对 LLaMA 模型支持了 **[FlashAttention-2](https://github.com/Dao-AILab/flash-attention)**。如果您使用的是 RTX4090、A100 或 H100 GPU,请使用 `--flash_attn` 参数以启用 FlashAttention-2(实验性功能)。 [23/08/18] 我们支持了**训练状态恢复**,请将 `transformers` 升级至 `4.31.0` 以启用此功能。 diff --git a/evaluation/cmmlu/cmmlu.py b/evaluation/cmmlu/cmmlu.py index e89164fd..62096203 100644 --- a/evaluation/cmmlu/cmmlu.py +++ b/evaluation/cmmlu/cmmlu.py @@ -160,4 +160,8 @@ class CMMLU(datasets.GeneratorBasedBuilder): def _generate_examples(self, filepath): df = pd.read_csv(filepath, header=0, index_col=0, encoding="utf-8") for i, instance in enumerate(df.to_dict(orient="records")): + question = instance.pop("Question", "") + answer = instance.pop("Answer", "") + instance["question"] = question + instance["answer"] = answer yield i, instance diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index ef881c44..761dba71 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -51,7 +51,9 @@ SUPPORTED_MODELS = { "InternLM-7B-Chat": "internlm/internlm-chat-7b", "InternLM-20B-Chat": "internlm/internlm-chat-20b", "Qwen-7B": "Qwen/Qwen-7B", + "Qwen-14B": "Qwen/Qwen-14B", "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat", + "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat", "XVERSE-13B": "xverse/XVERSE-13B", "XVERSE-13B-Chat": "xverse/XVERSE-13B-Chat", "ChatGLM2-6B-Chat": "THUDM/chatglm2-6b",