From f15e37dfad640378836930d640241a630182b82e Mon Sep 17 00:00:00 2001 From: wql Date: Thu, 5 Sep 2024 15:49:32 +0800 Subject: [PATCH] fix: fix bf16 --- prepare_yaml_file.py | 2 +- results/lora_sft_template.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/prepare_yaml_file.py b/prepare_yaml_file.py index 7428f229..36357f6b 100644 --- a/prepare_yaml_file.py +++ b/prepare_yaml_file.py @@ -28,7 +28,7 @@ def main(): model_name_or_path = "../../models/llama-2-7b-ms" template = "llama2" elif model == "Qwen-7B": - model_name_or_path = "/root/models/Qwen-7B" + model_name_or_path = "../../models/Qwen-7B" template = "qwen" else: print("ERROR: model not supported.") diff --git a/results/lora_sft_template.yaml b/results/lora_sft_template.yaml index a3b42642..9a4411e4 100644 --- a/results/lora_sft_template.yaml +++ b/results/lora_sft_template.yaml @@ -29,7 +29,7 @@ learning_rate: 1.0e-4 num_train_epochs: 10.0 lr_scheduler_type: cosine warmup_ratio: 0.1 -bf16: true +fp16: true ddp_timeout: 180000000 max_steps: 500 include_num_input_tokens_seen: true