diff --git a/examples/README.md b/examples/README.md index 6732faaf..4f34be52 100644 --- a/examples/README.md +++ b/examples/README.md @@ -3,7 +3,7 @@ We provide diverse examples about fine-tuning LLMs. ``` examples/ ├── lora_single_gpu/ -│ ├── pt.sh: Do pre-training +│ ├── pretrain.sh: Do pre-training │ ├── sft.sh: Do supervised fine-tuning │ ├── reward.sh: Do reward modeling │ ├── ppo.sh: Do PPO training diff --git a/examples/README_zh.md b/examples/README_zh.md index 50b5203f..a77209b3 100644 --- a/examples/README_zh.md +++ b/examples/README_zh.md @@ -3,7 +3,7 @@ ``` examples/ ├── lora_single_gpu/ -│ ├── pt.sh: 进行预训练 +│ ├── pretrain.sh: 进行预训练 │ ├── sft.sh: 进行指令监督微调 │ ├── reward.sh: 进行奖励模型训练 │ ├── ppo.sh: 进行 PPO 训练 diff --git a/examples/lora_single_gpu/dpo.sh b/examples/lora_single_gpu/dpo.sh index daa8ac85..56a2dfc3 100644 --- a/examples/lora_single_gpu/dpo.sh +++ b/examples/lora_single_gpu/dpo.sh @@ -6,7 +6,7 @@ CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ --model_name_or_path meta-llama/Llama-2-7b-hf \ --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \ --create_new_adapter \ - --dataset comparison_gpt4_en \ + --dataset orca_rlhf \ --dataset_dir ../../data \ --template default \ --finetuning_type lora \ diff --git a/examples/lora_single_gpu/reward.sh b/examples/lora_single_gpu/reward.sh index 0f775926..1212d082 100644 --- a/examples/lora_single_gpu/reward.sh +++ b/examples/lora_single_gpu/reward.sh @@ -6,7 +6,7 @@ CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \ --model_name_or_path meta-llama/Llama-2-7b-hf \ --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \ --create_new_adapter \ - --dataset comparison_gpt4_en \ + --dataset orca_rlhf \ --dataset_dir ../../data \ --template default \ --finetuning_type lora \