diff --git a/examples/deepspeed/ds_z0_config.json b/examples/deepspeed/ds_z0_config.json index b7826b20..ed326676 100644 --- a/examples/deepspeed/ds_z0_config.json +++ b/examples/deepspeed/ds_z0_config.json @@ -14,5 +14,15 @@ }, "bf16": { "enabled": "auto" + }, + "zero_optimization": { + "stage": 0, + "allgather_partitions": true, + "allgather_bucket_size": 5e8, + "overlap_comm": true, + "reduce_scatter": true, + "reduce_bucket_size": 5e8, + "contiguous_gradients": true, + "round_robin_gradients": true } } \ No newline at end of file diff --git a/examples/lora_multi_npu/ds_zero0.sh b/examples/lora_multi_npu/ds_zero0.sh index f849c5c9..4ffaa1b0 100644 --- a/examples/lora_multi_npu/ds_zero0.sh +++ b/examples/lora_multi_npu/ds_zero0.sh @@ -12,4 +12,4 @@ ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 torchrun \ --node_rank $RANK \ --master_addr $MASTER_ADDR \ --master_port $MASTER_PORT \ - src/train.py examples/lora_multi_gpu/llama3_lora_sft_ds.yaml + src/train.py examples/lora_multi_npu/llama3_lora_sft_ds.yaml