From 0048a2021e94d068f7c6054df0b9569ae4912eb1 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Wed, 6 Mar 2024 17:25:08 +0800 Subject: [PATCH] tiny fix --- README.md | 3 +-- README_zh.md | 3 +-- src/llmtuner/extras/constants.py | 24 ++++++++++++------------ src/llmtuner/train/sft/metric.py | 2 +- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 3a74966a..0e1bec64 100644 --- a/README.md +++ b/README.md @@ -475,8 +475,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ #### Use Huggingface Accelerate ```bash -accelerate config # configure the environment -accelerate launch src/train_bash.py # arguments (same as above) +accelerate launch --config_file config.yaml src/train_bash.py # arguments (same as above) ```
Example config for LoRA training diff --git a/README_zh.md b/README_zh.md index f5342726..bf1b8c9d 100644 --- a/README_zh.md +++ b/README_zh.md @@ -474,8 +474,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ #### 使用 Huggingface Accelerate ```bash -accelerate config # 首先配置分布式环境 -accelerate launch src/train_bash.py # 参数同上 +accelerate launch --config_file config.yaml src/train_bash.py # 参数同上 ```
LoRA 训练的 Accelerate 配置示例 diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index fab1e8e9..e85bdef4 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -671,48 +671,48 @@ register_model_group( DownloadSource.MODELSCOPE: "qwen/Qwen1.5-0.5B-Chat-GPTQ-Int8", }, "Qwen1.5-0.5B-int4-Chat": { - DownloadSource.DEFAULT: "Qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4", - DownloadSource.MODELSCOPE: "qwen/Qwen1.5-0.5B-Chat-GPTQ-Int4", + DownloadSource.DEFAULT: "Qwen/Qwen1.5-0.5B-Chat-AWQ", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-0.5B-Chat-AWQ", }, "Qwen1.5-1.8B-int8-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8", DownloadSource.MODELSCOPE: "qwen/Qwen1.5-1.8B-Chat-GPTQ-Int8", }, "Qwen1.5-1.8B-int4-Chat": { - DownloadSource.DEFAULT: "Qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4", - DownloadSource.MODELSCOPE: "qwen/Qwen1.5-1.8B-Chat-GPTQ-Int4", + DownloadSource.DEFAULT: "Qwen/Qwen1.5-1.8B-Chat-AWQ", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-1.8B-Chat-AWQ", }, "Qwen1.5-4B-int8-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen1.5-4B-Chat-GPTQ-Int8", DownloadSource.MODELSCOPE: "qwen/Qwen1.5-4B-Chat-GPTQ-Int8", }, "Qwen1.5-4B-int4-Chat": { - DownloadSource.DEFAULT: "Qwen/Qwen1.5-4B-Chat-GPTQ-Int4", - DownloadSource.MODELSCOPE: "qwen/Qwen1.5-4B-Chat-GPTQ-Int4", + DownloadSource.DEFAULT: "Qwen/Qwen1.5-4B-Chat-AWQ", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-4B-Chat-AWQ", }, "Qwen1.5-7B-int8-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen1.5-7B-Chat-GPTQ-Int8", DownloadSource.MODELSCOPE: "qwen/Qwen1.5-7B-Chat-GPTQ-Int8", }, "Qwen1.5-7B-int4-Chat": { - DownloadSource.DEFAULT: "Qwen/Qwen1.5-7B-Chat-GPTQ-Int4", - DownloadSource.MODELSCOPE: "qwen/Qwen1.5-7B-Chat-GPTQ-Int4", + DownloadSource.DEFAULT: "Qwen/Qwen1.5-7B-Chat-AWQ", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-7B-Chat-AWQ", }, "Qwen1.5-14B-int8-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen1.5-14B-Chat-GPTQ-Int8", DownloadSource.MODELSCOPE: "qwen/Qwen1.5-14B-Chat-GPTQ-Int8", }, "Qwen1.5-14B-int4-Chat": { - DownloadSource.DEFAULT: "Qwen/Qwen1.5-14B-Chat-GPTQ-Int4", - DownloadSource.MODELSCOPE: "qwen/Qwen1.5-14B-Chat-GPTQ-Int4", + DownloadSource.DEFAULT: "Qwen/Qwen1.5-14B-Chat-AWQ", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-14B-Chat-AWQ", }, "Qwen1.5-72B-int8-Chat": { DownloadSource.DEFAULT: "Qwen/Qwen1.5-72B-Chat-GPTQ-Int8", DownloadSource.MODELSCOPE: "qwen/Qwen1.5-72B-Chat-GPTQ-Int8", }, "Qwen1.5-72B-int4-Chat": { - DownloadSource.DEFAULT: "Qwen/Qwen1.5-72B-Chat-GPTQ-Int4", - DownloadSource.MODELSCOPE: "qwen/Qwen1.5-72B-Chat-GPTQ-Int4", + DownloadSource.DEFAULT: "Qwen/Qwen1.5-72B-Chat-AWQ", + DownloadSource.MODELSCOPE: "qwen/Qwen1.5-72B-Chat-AWQ", }, }, template="qwen", diff --git a/src/llmtuner/train/sft/metric.py b/src/llmtuner/train/sft/metric.py index f37eba97..d1af4c17 100644 --- a/src/llmtuner/train/sft/metric.py +++ b/src/llmtuner/train/sft/metric.py @@ -11,7 +11,7 @@ if TYPE_CHECKING: from transformers.tokenization_utils import PreTrainedTokenizer if is_jieba_available(): - import jieba + import jieba # type: ignore if is_nltk_available(): from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu