From 7e69e71a52c736d0e42afbf61a3b3c22db606bc2 Mon Sep 17 00:00:00 2001 From: hiyouga Date: Wed, 15 May 2024 00:26:10 +0800 Subject: [PATCH] fix examples --- examples/deepspeed/ds_z0_config.json | 10 ++++++++++ examples/lora_multi_npu/ds_zero0.sh | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/examples/deepspeed/ds_z0_config.json b/examples/deepspeed/ds_z0_config.json index b7826b20..ed326676 100644 --- a/examples/deepspeed/ds_z0_config.json +++ b/examples/deepspeed/ds_z0_config.json @@ -14,5 +14,15 @@ }, "bf16": { "enabled": "auto" + }, + "zero_optimization": { + "stage": 0, + "allgather_partitions": true, + "allgather_bucket_size": 5e8, + "overlap_comm": true, + "reduce_scatter": true, + "reduce_bucket_size": 5e8, + "contiguous_gradients": true, + "round_robin_gradients": true } } \ No newline at end of file diff --git a/examples/lora_multi_npu/ds_zero0.sh b/examples/lora_multi_npu/ds_zero0.sh index f849c5c9..4ffaa1b0 100644 --- a/examples/lora_multi_npu/ds_zero0.sh +++ b/examples/lora_multi_npu/ds_zero0.sh @@ -12,4 +12,4 @@ ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 torchrun \ --node_rank $RANK \ --master_addr $MASTER_ADDR \ --master_port $MASTER_PORT \ - src/train.py examples/lora_multi_gpu/llama3_lora_sft_ds.yaml + src/train.py examples/lora_multi_npu/llama3_lora_sft_ds.yaml