diff --git a/examples/deepspeed/ds_z0_config.json b/examples/deepspeed/ds_z0_config.json
index b7826b20..ed326676 100644
--- a/examples/deepspeed/ds_z0_config.json
+++ b/examples/deepspeed/ds_z0_config.json
@@ -14,5 +14,15 @@
   },
   "bf16": {
     "enabled": "auto"
+  },
+  "zero_optimization": {
+    "stage": 0,
+    "allgather_partitions": true,
+    "allgather_bucket_size": 5e8,
+    "overlap_comm": true,
+    "reduce_scatter": true,
+    "reduce_bucket_size": 5e8,
+    "contiguous_gradients": true,
+    "round_robin_gradients": true
   }
 }
\ No newline at end of file
diff --git a/examples/lora_multi_npu/ds_zero0.sh b/examples/lora_multi_npu/ds_zero0.sh
index f849c5c9..4ffaa1b0 100644
--- a/examples/lora_multi_npu/ds_zero0.sh
+++ b/examples/lora_multi_npu/ds_zero0.sh
@@ -12,4 +12,4 @@ ASCEND_RT_VISIBLE_DEVICES=0,1,2,3 torchrun \
     --node_rank $RANK \
     --master_addr $MASTER_ADDR \
     --master_port $MASTER_PORT \
-    src/train.py examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
+    src/train.py examples/lora_multi_npu/llama3_lora_sft_ds.yaml