diff --git a/examples/README.md b/examples/README.md
index ce19f9d1..0838314a 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -28,6 +28,12 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo
 CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
 ```
 
+#### Multimodal Supervised Fine-Tuning
+
+```bash
+CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml
+```
+
 #### Reward Modeling
 
 ```bash
@@ -52,12 +58,6 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo
 CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_orpo.yaml
 ```
 
-#### Multimodal Supervised Fine-Tuning
-
-```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml
-```
-
 #### Preprocess Dataset
 
 It is useful for large dataset, use `tokenized_path` in config to load the preprocessed dataset.
diff --git a/examples/README_zh.md b/examples/README_zh.md
index 91bdcda9..7fe43954 100644
--- a/examples/README_zh.md
+++ b/examples/README_zh.md
@@ -28,6 +28,12 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo
 CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
 ```
 
+#### 多模态指令监督微调
+
+```bash
+CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml
+```
+
 #### 奖励模型训练
 
 ```bash
@@ -52,12 +58,6 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lo
 CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_orpo.yaml
 ```
 
-#### 多模态指令监督微调
-
-```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llava1_5_lora_sft.yaml
-```
-
 #### 预处理数据集
 
 对于大数据集有帮助，在配置中使用 `tokenized_path` 以加载预处理后的数据集。
diff --git a/examples/extras/badam/llama3_lora_sft.yaml b/examples/extras/badam/llama3_lora_sft.yaml
index 9f1f1976..5e8994bc 100644
--- a/examples/extras/badam/llama3_lora_sft.yaml
+++ b/examples/extras/badam/llama3_lora_sft.yaml
@@ -15,7 +15,6 @@ dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -36,6 +35,7 @@ warmup_steps: 0.1
 pure_bf16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
index 64bf1356..1fd8f16a 100644
--- a/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
+++ b/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
@@ -8,12 +8,14 @@ do_train: true
 finetuning_type: lora
 lora_target: q_proj,v_proj
 
+# ddp
+ddp_timeout: 180000000
+
 # dataset
 dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -34,6 +36,7 @@ warmup_steps: 0.1
 fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/extras/galore/llama3_full_sft.yaml b/examples/extras/galore/llama3_full_sft.yaml
index 5aec8af9..3bc074c5 100644
--- a/examples/extras/galore/llama3_full_sft.yaml
+++ b/examples/extras/galore/llama3_full_sft.yaml
@@ -16,7 +16,6 @@ dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -37,6 +36,7 @@ warmup_steps: 0.1
 pure_bf16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/extras/llama_pro/llama3_freeze_sft.yaml b/examples/extras/llama_pro/llama3_freeze_sft.yaml
index a54be8b8..4d92cdad 100644
--- a/examples/extras/llama_pro/llama3_freeze_sft.yaml
+++ b/examples/extras/llama_pro/llama3_freeze_sft.yaml
@@ -14,7 +14,6 @@ dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -32,9 +31,10 @@ learning_rate: 0.0001
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_steps: 0.1
-pure_bf16: true
+fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/extras/loraplus/llama3_lora_sft.yaml b/examples/extras/loraplus/llama3_lora_sft.yaml
index dfb7058b..0956aa71 100644
--- a/examples/extras/loraplus/llama3_lora_sft.yaml
+++ b/examples/extras/loraplus/llama3_lora_sft.yaml
@@ -13,7 +13,6 @@ dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -31,9 +30,10 @@ learning_rate: 0.0001
 num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_steps: 0.1
-pure_bf16: true
+fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/extras/mod/llama3_full_sft.yaml b/examples/extras/mod/llama3_full_sft.yaml
index 5f80521d..5dc8c061 100644
--- a/examples/extras/mod/llama3_full_sft.yaml
+++ b/examples/extras/mod/llama3_full_sft.yaml
@@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -34,6 +33,7 @@ warmup_steps: 0.1
 pure_bf16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/full_multi_gpu/llama3_full_sft.yaml b/examples/full_multi_gpu/llama3_full_sft.yaml
index ef35e441..2d8031f1 100644
--- a/examples/full_multi_gpu/llama3_full_sft.yaml
+++ b/examples/full_multi_gpu/llama3_full_sft.yaml
@@ -15,7 +15,6 @@ dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -36,6 +35,7 @@ warmup_steps: 0.1
 fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_multi_gpu/llama3_lora_sft.yaml b/examples/lora_multi_gpu/llama3_lora_sft.yaml
index d9690679..6cc06f8a 100644
--- a/examples/lora_multi_gpu/llama3_lora_sft.yaml
+++ b/examples/lora_multi_gpu/llama3_lora_sft.yaml
@@ -15,7 +15,6 @@ dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -36,6 +35,7 @@ warmup_steps: 0.1
 fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
index 26955167..5a7348c1 100644
--- a/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
+++ b/examples/lora_multi_gpu/llama3_lora_sft_ds.yaml
@@ -16,7 +16,6 @@ dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -37,6 +36,7 @@ warmup_steps: 0.1
 fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_single_gpu/llama3_lora_dpo.yaml b/examples/lora_single_gpu/llama3_lora_dpo.yaml
index f71f752d..16c6d0c9 100644
--- a/examples/lora_single_gpu/llama3_lora_dpo.yaml
+++ b/examples/lora_single_gpu/llama3_lora_dpo.yaml
@@ -13,7 +13,6 @@ dataset: orca_rlhf
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -34,6 +33,7 @@ warmup_steps: 0.1
 fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_single_gpu/llama3_lora_orpo.yaml b/examples/lora_single_gpu/llama3_lora_orpo.yaml
index 5d78d260..bc42bdd4 100644
--- a/examples/lora_single_gpu/llama3_lora_orpo.yaml
+++ b/examples/lora_single_gpu/llama3_lora_orpo.yaml
@@ -12,7 +12,6 @@ dataset: orca_rlhf
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -33,6 +32,7 @@ warmup_steps: 0.1
 fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_single_gpu/llama3_lora_pretrain.yaml b/examples/lora_single_gpu/llama3_lora_pretrain.yaml
index 64245b71..48425b15 100644
--- a/examples/lora_single_gpu/llama3_lora_pretrain.yaml
+++ b/examples/lora_single_gpu/llama3_lora_pretrain.yaml
@@ -11,7 +11,6 @@ lora_target: q_proj,v_proj
 dataset: c4_demo
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -32,6 +31,7 @@ warmup_steps: 0.1
 fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_single_gpu/llama3_lora_reward.yaml b/examples/lora_single_gpu/llama3_lora_reward.yaml
index f190f4ac..ecaf8d72 100644
--- a/examples/lora_single_gpu/llama3_lora_reward.yaml
+++ b/examples/lora_single_gpu/llama3_lora_reward.yaml
@@ -12,7 +12,6 @@ dataset: orca_rlhf
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -33,6 +32,7 @@ warmup_steps: 0.1
 fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_single_gpu/llama3_lora_sft.yaml b/examples/lora_single_gpu/llama3_lora_sft.yaml
index f99df305..0e5e30b3 100644
--- a/examples/lora_single_gpu/llama3_lora_sft.yaml
+++ b/examples/lora_single_gpu/llama3_lora_sft.yaml
@@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -33,6 +32,7 @@ warmup_steps: 0.1
 fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/lora_single_gpu/llama3_preprocess.yaml b/examples/lora_single_gpu/llama3_preprocess.yaml
index 0b3dc599..4c45c1cd 100644
--- a/examples/lora_single_gpu/llama3_preprocess.yaml
+++ b/examples/lora_single_gpu/llama3_preprocess.yaml
@@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 tokenized_path: saves/llama3-8b/dataset/sft
diff --git a/examples/lora_single_gpu/llava1_5_lora_sft.yaml b/examples/lora_single_gpu/llava1_5_lora_sft.yaml
index 96c2701a..84d2a672 100644
--- a/examples/lora_single_gpu/llava1_5_lora_sft.yaml
+++ b/examples/lora_single_gpu/llava1_5_lora_sft.yaml
@@ -13,7 +13,6 @@ dataset: mllm_demo
 template: vicuna
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -34,6 +33,7 @@ warmup_steps: 0.1
 fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
index 11f1d277..a1d5f95d 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_aqlm.yaml
@@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -33,6 +32,7 @@ warmup_steps: 0.1
 fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
index 4b070d45..8941d6b2 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_awq.yaml
@@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -33,6 +32,7 @@ warmup_steps: 0.1
 fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
index 7bc31bde..885fcd83 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_bitsandbytes.yaml
@@ -8,15 +8,11 @@ do_train: true
 finetuning_type: lora
 lora_target: q_proj,v_proj
 
-# ddp
-ddp_timeout: 180000000
-
 # dataset
 dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -37,6 +33,7 @@ warmup_steps: 0.1
 fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500
diff --git a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
index 2f8cfe45..87a404a0 100644
--- a/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
+++ b/examples/qlora_single_gpu/llama3_lora_sft_gptq.yaml
@@ -12,7 +12,6 @@ dataset: identity,alpaca_gpt4_en
 template: llama3
 cutoff_len: 1024
 max_samples: 1000
-val_size: 0.1
 overwrite_cache: true
 preprocessing_num_workers: 16
 
@@ -33,6 +32,7 @@ warmup_steps: 0.1
 fp16: true
 
 # eval
+val_size: 0.1
 per_device_eval_batch_size: 1
 evaluation_strategy: steps
 eval_steps: 500