Merge branch 'hiyouga:main' into main
This commit is contained in:
commit
cde4dfe569
|
@ -19,11 +19,11 @@ examples/
|
||||||
├── lora_multi_gpu/
|
├── lora_multi_gpu/
|
||||||
│ ├── single_node.sh: Fine-tune model with Accelerate on single node using LoRA
|
│ ├── single_node.sh: Fine-tune model with Accelerate on single node using LoRA
|
||||||
│ ├── multi_node.sh: Fine-tune model with Accelerate on multiple nodes using LoRA
|
│ ├── multi_node.sh: Fine-tune model with Accelerate on multiple nodes using LoRA
|
||||||
│ └── ds_zero3.sh: Fine-tune model with DeepSpeed ZeRO-3 using LoRA
|
│ └── ds_zero3.sh: Fine-tune model with DeepSpeed ZeRO-3 using LoRA (weight sharding)
|
||||||
├── full_multi_gpu/
|
├── full_multi_gpu/
|
||||||
│ ├── single_node.sh: Full fine-tune model with DeepSpeed on single node
|
│ ├── single_node.sh: Full fine-tune model with DeepSpeed on single node
|
||||||
│ ├── multi_node.sh: Full fine-tune model with DeepSpeed on multiple nodes
|
│ ├── multi_node.sh: Full fine-tune model with DeepSpeed on multiple nodes
|
||||||
│ └── predict.sh: Do batch predict and compute BLEU and ROUGE scores after full tuning
|
│ └── predict.sh: Do parallel batch predict and compute BLEU and ROUGE scores after full tuning
|
||||||
├── merge_lora/
|
├── merge_lora/
|
||||||
│ ├── merge.sh: Merge LoRA weights into the pre-trained models
|
│ ├── merge.sh: Merge LoRA weights into the pre-trained models
|
||||||
│ └── quantize.sh: Quantize the fine-tuned model with AutoGPTQ
|
│ └── quantize.sh: Quantize the fine-tuned model with AutoGPTQ
|
||||||
|
|
|
@ -19,11 +19,11 @@ examples/
|
||||||
├── lora_multi_gpu/
|
├── lora_multi_gpu/
|
||||||
│ ├── single_node.sh: 使用 Accelerate 进行单节点 LoRA 训练
|
│ ├── single_node.sh: 使用 Accelerate 进行单节点 LoRA 训练
|
||||||
│ ├── multi_node.sh: 使用 Accelerate 进行多节点 LoRA 训练
|
│ ├── multi_node.sh: 使用 Accelerate 进行多节点 LoRA 训练
|
||||||
│ └── ds_zero3.sh: 使用 DeepSpeed ZeRO-3 进行 LoRA 训练
|
│ └── ds_zero3.sh: 使用 DeepSpeed ZeRO-3 进行 LoRA 训练(拆分权重)
|
||||||
├── full_multi_gpu/
|
├── full_multi_gpu/
|
||||||
│ ├── single_node.sh: 使用 DeepSpeed 进行单节点全量训练
|
│ ├── single_node.sh: 使用 DeepSpeed 进行单节点全量训练
|
||||||
│ ├── multi_node.sh: 使用 DeepSpeed 进行多节点全量训练
|
│ ├── multi_node.sh: 使用 DeepSpeed 进行多节点全量训练
|
||||||
│ └── predict.sh: 基于全量训练进行批量预测并计算 BLEU 和 ROUGE 分数
|
│ └── predict.sh: 基于全量训练进行多卡批量预测并计算 BLEU 和 ROUGE 分数
|
||||||
├── merge_lora/
|
├── merge_lora/
|
||||||
│ ├── merge.sh: 将 LoRA 权重合并到预训练模型中
|
│ ├── merge.sh: 将 LoRA 权重合并到预训练模型中
|
||||||
│ └── quantize.sh: 使用 AutoGPTQ 量化微调后的模型
|
│ └── quantize.sh: 使用 AutoGPTQ 量化微调后的模型
|
||||||
|
|
|
@ -9,7 +9,7 @@ main_process_port: 29555
|
||||||
main_training_function: main
|
main_training_function: main
|
||||||
mixed_precision: fp16
|
mixed_precision: fp16
|
||||||
num_machines: 2 # the number of nodes
|
num_machines: 2 # the number of nodes
|
||||||
num_processes: 16 # the number of GPUs in all nodes
|
num_processes: 8 # the number of GPUs in all nodes
|
||||||
rdzv_backend: static
|
rdzv_backend: static
|
||||||
same_network: true
|
same_network: true
|
||||||
tpu_env: []
|
tpu_env: []
|
||||||
|
|
|
@ -9,7 +9,7 @@ main_process_port: 29555
|
||||||
main_training_function: main
|
main_training_function: main
|
||||||
mixed_precision: fp16
|
mixed_precision: fp16
|
||||||
num_machines: 2 # the number of nodes
|
num_machines: 2 # the number of nodes
|
||||||
num_processes: 16 # the number of GPUs in all nodes
|
num_processes: 8 # the number of GPUs in all nodes
|
||||||
rdzv_backend: static
|
rdzv_backend: static
|
||||||
same_network: true
|
same_network: true
|
||||||
tpu_env: []
|
tpu_env: []
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
|
CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
|
||||||
|
--config_file ../accelerate/single_config.yaml \
|
||||||
|
../../src/train_bash.py \
|
||||||
--stage sft \
|
--stage sft \
|
||||||
--do_predict \
|
--do_predict \
|
||||||
--model_name_or_path ../../saves/LLaMA2-7B/full/sft \
|
--model_name_or_path ../../saves/LLaMA2-7B/full/sft \
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
# also launch it on slave machine using slave_config.yaml
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
|
CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
|
||||||
--config_file ../accelerate/master_config.yaml \
|
--config_file ../accelerate/master_config.yaml \
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 accelerate launch \
|
CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
|
||||||
--config_file ../accelerate/single_config.yaml \
|
--config_file ../accelerate/single_config.yaml \
|
||||||
../../src/train_bash.py \
|
../../src/train_bash.py \
|
||||||
--stage sft \
|
--stage sft \
|
||||||
|
|
Loading…
Reference in New Issue