diff --git a/results/inference/Baichuan2-7B/Baichuan2_predict_1.yaml b/results/inference/Baichuan2-7B/Baichuan2_predict_1.yaml
index f12fd9ff..bafb3f5f 100644
--- a/results/inference/Baichuan2-7B/Baichuan2_predict_1.yaml
+++ b/results/inference/Baichuan2-7B/Baichuan2_predict_1.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Baichuan2-7B/Baichuan2_predict_1
diff --git a/results/inference/Baichuan2-7B/Baichuan2_predict_1_single.yaml b/results/inference/Baichuan2-7B/Baichuan2_predict_1_single.yaml
index 9ac8bb3c..5a9b7a04 100644
--- a/results/inference/Baichuan2-7B/Baichuan2_predict_1_single.yaml
+++ b/results/inference/Baichuan2-7B/Baichuan2_predict_1_single.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Baichuan2-7B/Baichuan2_predict_1_single
diff --git a/results/inference/Baichuan2-7B/Baichuan2_predict_2.yaml b/results/inference/Baichuan2-7B/Baichuan2_predict_2.yaml
index 86c072fc..4e0c2737 100644
--- a/results/inference/Baichuan2-7B/Baichuan2_predict_2.yaml
+++ b/results/inference/Baichuan2-7B/Baichuan2_predict_2.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Baichuan2-7B/Baichuan2_predict_2
diff --git a/results/inference/Baichuan2-7B/Baichuan2_predict_2_single.yaml b/results/inference/Baichuan2-7B/Baichuan2_predict_2_single.yaml
index 22bddb02..e4c8d1ab 100644
--- a/results/inference/Baichuan2-7B/Baichuan2_predict_2_single.yaml
+++ b/results/inference/Baichuan2-7B/Baichuan2_predict_2_single.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Baichuan2-7B/Baichuan2_predict_2_single
diff --git a/results/inference/Baichuan2-7B/Baichuan2_predict_3.yaml b/results/inference/Baichuan2-7B/Baichuan2_predict_3.yaml
index 78c1d1b7..deb9bd93 100644
--- a/results/inference/Baichuan2-7B/Baichuan2_predict_3.yaml
+++ b/results/inference/Baichuan2-7B/Baichuan2_predict_3.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Baichuan2-7B/Baichuan2_predict_3
diff --git a/results/inference/Baichuan2-7B/Baichuan2_predict_3_single.yaml b/results/inference/Baichuan2-7B/Baichuan2_predict_3_single.yaml
index 99f9305a..f0af7bbd 100644
--- a/results/inference/Baichuan2-7B/Baichuan2_predict_3_single.yaml
+++ b/results/inference/Baichuan2-7B/Baichuan2_predict_3_single.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Baichuan2-7B/Baichuan2_predict_3_single
diff --git a/results/inference/ChatGLM2-6B/ChatGLM2_predict_1.yaml b/results/inference/ChatGLM2-6B/ChatGLM2_predict_1.yaml
index 418a91cd..8ac7c624 100644
--- a/results/inference/ChatGLM2-6B/ChatGLM2_predict_1.yaml
+++ b/results/inference/ChatGLM2-6B/ChatGLM2_predict_1.yaml
@@ -1,5 +1,5 @@
 ### model
-model_name_or_path: ZhipuAI/chatglm2-6b
+model_name_or_path: ../../llm/chatglm/data
 
 ### method
 do_predict: true
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/ChatGLM2-6B/ChatGLM2_predict_1
diff --git a/results/inference/ChatGLM2-6B/ChatGLM2_predict_1_single.yaml b/results/inference/ChatGLM2-6B/ChatGLM2_predict_1_single.yaml
index bc336d1e..c5243e05 100644
--- a/results/inference/ChatGLM2-6B/ChatGLM2_predict_1_single.yaml
+++ b/results/inference/ChatGLM2-6B/ChatGLM2_predict_1_single.yaml
@@ -1,5 +1,5 @@
 ### model
-model_name_or_path: ZhipuAI/chatglm2-6b
+model_name_or_path: ../../llm/chatglm/data
 
 ### method
 do_predict: true
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/ChatGLM2-6B/ChatGLM2_predict_1_single
diff --git a/results/inference/ChatGLM2-6B/ChatGLM2_predict_2.yaml b/results/inference/ChatGLM2-6B/ChatGLM2_predict_2.yaml
index e0376290..5acef859 100644
--- a/results/inference/ChatGLM2-6B/ChatGLM2_predict_2.yaml
+++ b/results/inference/ChatGLM2-6B/ChatGLM2_predict_2.yaml
@@ -1,5 +1,5 @@
 ### model
-model_name_or_path: ZhipuAI/chatglm2-6b
+model_name_or_path: ../../llm/chatglm/data
 
 ### method
 do_predict: true
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/ChatGLM2-6B/ChatGLM2_predict_2
diff --git a/results/inference/ChatGLM2-6B/ChatGLM2_predict_2_single.yaml b/results/inference/ChatGLM2-6B/ChatGLM2_predict_2_single.yaml
index 8e2f51fd..16bae771 100644
--- a/results/inference/ChatGLM2-6B/ChatGLM2_predict_2_single.yaml
+++ b/results/inference/ChatGLM2-6B/ChatGLM2_predict_2_single.yaml
@@ -1,5 +1,5 @@
 ### model
-model_name_or_path: ZhipuAI/chatglm2-6b
+model_name_or_path: ../../llm/chatglm/data
 
 ### method
 do_predict: true
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/ChatGLM2-6B/ChatGLM2_predict_2_single
diff --git a/results/inference/ChatGLM2-6B/ChatGLM2_predict_3.yaml b/results/inference/ChatGLM2-6B/ChatGLM2_predict_3.yaml
index 7b40f6a7..fd99631b 100644
--- a/results/inference/ChatGLM2-6B/ChatGLM2_predict_3.yaml
+++ b/results/inference/ChatGLM2-6B/ChatGLM2_predict_3.yaml
@@ -1,5 +1,5 @@
 ### model
-model_name_or_path: ZhipuAI/chatglm2-6b
+model_name_or_path: ../../llm/chatglm/data
 
 ### method
 do_predict: true
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/ChatGLM2-6B/ChatGLM2_predict_3
diff --git a/results/inference/ChatGLM2-6B/ChatGLM2_predict_3_single.yaml b/results/inference/ChatGLM2-6B/ChatGLM2_predict_3_single.yaml
index 4e9f3a15..bc7b7355 100644
--- a/results/inference/ChatGLM2-6B/ChatGLM2_predict_3_single.yaml
+++ b/results/inference/ChatGLM2-6B/ChatGLM2_predict_3_single.yaml
@@ -1,5 +1,5 @@
 ### model
-model_name_or_path: ZhipuAI/chatglm2-6b
+model_name_or_path: ../../llm/chatglm/data
 
 ### method
 do_predict: true
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/ChatGLM2-6B/ChatGLM2_predict_3_single
diff --git a/results/inference/Llama2-7B/llama2_predict_1.yaml b/results/inference/Llama2-7B/llama2_predict_1.yaml
index 9e4ce664..2098094a 100644
--- a/results/inference/Llama2-7B/llama2_predict_1.yaml
+++ b/results/inference/Llama2-7B/llama2_predict_1.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Llama2-7B/llama2_predict_1
diff --git a/results/inference/Llama2-7B/llama2_predict_1_single.yaml b/results/inference/Llama2-7B/llama2_predict_1_single.yaml
index c355038b..b915be91 100644
--- a/results/inference/Llama2-7B/llama2_predict_1_single.yaml
+++ b/results/inference/Llama2-7B/llama2_predict_1_single.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Llama2-7B/llama2_predict_1_single
diff --git a/results/inference/Llama2-7B/llama2_predict_2.yaml b/results/inference/Llama2-7B/llama2_predict_2.yaml
index 2348a14d..a40f6938 100644
--- a/results/inference/Llama2-7B/llama2_predict_2.yaml
+++ b/results/inference/Llama2-7B/llama2_predict_2.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Llama2-7B/llama2_predict_2
diff --git a/results/inference/Llama2-7B/llama2_predict_2_single.yaml b/results/inference/Llama2-7B/llama2_predict_2_single.yaml
index a04fba54..152e2571 100644
--- a/results/inference/Llama2-7B/llama2_predict_2_single.yaml
+++ b/results/inference/Llama2-7B/llama2_predict_2_single.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Llama2-7B/llama2_predict_2_single
diff --git a/results/inference/Llama2-7B/llama2_predict_3.yaml b/results/inference/Llama2-7B/llama2_predict_3.yaml
index c68f306a..2aee434d 100644
--- a/results/inference/Llama2-7B/llama2_predict_3.yaml
+++ b/results/inference/Llama2-7B/llama2_predict_3.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Llama2-7B/llama2_predict_3
diff --git a/results/inference/Llama2-7B/llama2_predict_3_single.yaml b/results/inference/Llama2-7B/llama2_predict_3_single.yaml
index 37c6cca1..5f2dcc1e 100644
--- a/results/inference/Llama2-7B/llama2_predict_3_single.yaml
+++ b/results/inference/Llama2-7B/llama2_predict_3_single.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Llama2-7B/llama2_predict_3_single
diff --git a/results/inference/Qwen-7B/Qwen_predict_1.yaml b/results/inference/Qwen-7B/Qwen_predict_1.yaml
index 22fc846a..46dc0764 100644
--- a/results/inference/Qwen-7B/Qwen_predict_1.yaml
+++ b/results/inference/Qwen-7B/Qwen_predict_1.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Qwen-7B/Qwen_predict_1
diff --git a/results/inference/Qwen-7B/Qwen_predict_1_single.yaml b/results/inference/Qwen-7B/Qwen_predict_1_single.yaml
index 6d372817..09acdbae 100644
--- a/results/inference/Qwen-7B/Qwen_predict_1_single.yaml
+++ b/results/inference/Qwen-7B/Qwen_predict_1_single.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Qwen-7B/Qwen_predict_1_single
diff --git a/results/inference/Qwen-7B/Qwen_predict_2.yaml b/results/inference/Qwen-7B/Qwen_predict_2.yaml
index c6e936b2..1c92a601 100644
--- a/results/inference/Qwen-7B/Qwen_predict_2.yaml
+++ b/results/inference/Qwen-7B/Qwen_predict_2.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Qwen-7B/Qwen_predict_2
diff --git a/results/inference/Qwen-7B/Qwen_predict_2_single.yaml b/results/inference/Qwen-7B/Qwen_predict_2_single.yaml
index 69a26e2f..a4fc4b81 100644
--- a/results/inference/Qwen-7B/Qwen_predict_2_single.yaml
+++ b/results/inference/Qwen-7B/Qwen_predict_2_single.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Qwen-7B/Qwen_predict_2_single
diff --git a/results/inference/Qwen-7B/Qwen_predict_3.yaml b/results/inference/Qwen-7B/Qwen_predict_3.yaml
index e4014295..806e629e 100644
--- a/results/inference/Qwen-7B/Qwen_predict_3.yaml
+++ b/results/inference/Qwen-7B/Qwen_predict_3.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Qwen-7B/Qwen_predict_3
diff --git a/results/inference/Qwen-7B/Qwen_predict_3_single.yaml b/results/inference/Qwen-7B/Qwen_predict_3_single.yaml
index 3935c3e9..593d8d7e 100644
--- a/results/inference/Qwen-7B/Qwen_predict_3_single.yaml
+++ b/results/inference/Qwen-7B/Qwen_predict_3_single.yaml
@@ -11,6 +11,7 @@ cutoff_len: 1024
 max_samples: 50
 overwrite_cache: true
 preprocessing_num_workers: 16
+include_tokens_per_second: true
 
 ### output
 output_dir: ./results/inference/Qwen-7B/Qwen_predict_3_single
diff --git a/src/llamafactory/train/sft/workflow.py b/src/llamafactory/train/sft/workflow.py
index 5da99557..345bcd5d 100644
--- a/src/llamafactory/train/sft/workflow.py
+++ b/src/llamafactory/train/sft/workflow.py
@@ -15,6 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import time
 from typing import TYPE_CHECKING, List, Optional
 
 from ...data import SFTDataCollatorWith4DAttentionMask, get_dataset
@@ -112,7 +113,9 @@ def run_sft(
 
     # Predict
     if training_args.do_predict:
+        print("predict start time: " + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
         predict_results = trainer.predict(dataset_module["eval_dataset"], metric_key_prefix="predict", **gen_kwargs)
+        print("predict end time: " + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
         if training_args.predict_with_generate:  # predict_loss will be wrong if predict_with_generate is enabled
             predict_results.metrics.pop("predict_loss", None)
         trainer.log_metrics("predict", predict_results.metrics)