diff --git a/results/inference/Llama2-7B/llama2_predict_1.log b/results/inference/Llama2-7B/llama2_predict_1.log
new file mode 100644
index 00000000..d53622a4
--- /dev/null
+++ b/results/inference/Llama2-7B/llama2_predict_1.log
@@ -0,0 +1,58 @@
+08/18/2024 17:34:57 - INFO - llamafactory.cli - Initializing distributed tasks at: 127.0.0.1:23898
+08/18/2024 17:35:04 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:35:04 - INFO - llamafactory.hparams.parser - Process rank: 6, device: cuda:6, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:35:05 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:35:05 - INFO - llamafactory.hparams.parser - Process rank: 4, device: cuda:4, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:35:05 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:35:05 - INFO - llamafactory.hparams.parser - Process rank: 5, device: cuda:5, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:35:05 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:35:05 - INFO - llamafactory.hparams.parser - Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:35:05 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:35:05 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:35:05 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:35:05 - INFO - llamafactory.hparams.parser - Process rank: 3, device: cuda:3, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:35:05 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:35:05 - INFO - llamafactory.hparams.parser - Process rank: 2, device: cuda:2, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:35:06 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:35:16 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:35:16 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:35:16 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:35:16 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:35:16 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:35:16 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+eval example:
+input_ids:
+[1, 518, 25580, 29962, 29871, 30982, 31695, 31863, 31577, 30210, 30457, 30502, 31302, 30858, 30267, 518, 29914, 25580, 29962]
+inputs:
+<s> [INST] 保持健康的三个提示。 [/INST]
+08/18/2024 17:35:34 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:35:34 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:35:34 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:35:34 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:35:34 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:35:34 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:35:34 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:36:13 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:36:13 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:36:13 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:36:13 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:36:14 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:36:14 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:36:14 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:36:14 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:36:14 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:36:14 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:36:14 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:36:14 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:36:14 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:36:14 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+***** predict metrics *****
+  predict_bleu-4                 =     1.4552
+  predict_model_preparation_time =     0.0031
+  predict_rouge-1                =    11.0984
+  predict_rouge-2                =     3.0446
+  predict_rouge-l                =     5.0312
+  predict_runtime                = 0:01:03.50
+  predict_samples_per_second     =      0.787
+  predict_steps_per_second       =      0.063
+08/18/2024 17:37:18 - INFO - llamafactory.train.sft.trainer - Saving prediction results to ./results/inference/Llama2-7B/llama2_predict_1/generated_predictions.jsonl
diff --git a/results/inference/Llama2-7B/llama2_predict_1_single.log b/results/inference/Llama2-7B/llama2_predict_1_single.log
new file mode 100644
index 00000000..2853a6bf
--- /dev/null
+++ b/results/inference/Llama2-7B/llama2_predict_1_single.log
@@ -0,0 +1,20 @@
+08/18/2024 17:42:59 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: None
+08/18/2024 17:43:00 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+eval example:
+input_ids:
+[1, 518, 25580, 29962, 29871, 30982, 31695, 31863, 31577, 30210, 30457, 30502, 31302, 30858, 30267, 518, 29914, 25580, 29962]
+inputs:
+<s> [INST] 保持健康的三个提示。 [/INST]
+08/18/2024 17:43:05 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:43:34 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:43:34 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+***** predict metrics *****
+  predict_bleu-4                 =     1.2952
+  predict_model_preparation_time =     0.0029
+  predict_rouge-1                =    12.8381
+  predict_rouge-2                =     3.2551
+  predict_rouge-l                =     5.1021
+  predict_runtime                = 0:06:01.55
+  predict_samples_per_second     =      0.138
+  predict_steps_per_second       =      0.069
+08/18/2024 17:49:36 - INFO - llamafactory.train.sft.trainer - Saving prediction results to ./results/inference/Llama2-7B/llama2_predict_1_single/generated_predictions.jsonl
diff --git a/results/inference/Llama2-7B/llama2_predict_2.log b/results/inference/Llama2-7B/llama2_predict_2.log
new file mode 100644
index 00000000..f80b641d
--- /dev/null
+++ b/results/inference/Llama2-7B/llama2_predict_2.log
@@ -0,0 +1,58 @@
+08/18/2024 17:37:25 - INFO - llamafactory.cli - Initializing distributed tasks at: 127.0.0.1:28639
+08/18/2024 17:37:31 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:37:31 - INFO - llamafactory.hparams.parser - Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:37:32 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:37:32 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:37:32 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:37:32 - INFO - llamafactory.hparams.parser - Process rank: 6, device: cuda:6, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:37:33 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:37:33 - INFO - llamafactory.hparams.parser - Process rank: 2, device: cuda:2, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:37:33 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:37:33 - INFO - llamafactory.hparams.parser - Process rank: 3, device: cuda:3, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:37:33 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:37:33 - INFO - llamafactory.hparams.parser - Process rank: 4, device: cuda:4, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:37:33 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:37:33 - INFO - llamafactory.hparams.parser - Process rank: 5, device: cuda:5, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:37:33 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:38:38 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:38:38 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:38:38 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:38:38 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:38:38 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:38:38 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+eval example:
+input_ids:
+[1, 518, 25580, 29962, 29871, 30982, 31695, 31863, 31577, 30210, 30457, 30502, 31302, 30858, 30267, 518, 29914, 25580, 29962]
+inputs:
+<s> [INST] 保持健康的三个提示。 [/INST]
+08/18/2024 17:38:44 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:38:44 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:38:44 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:38:44 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:38:44 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:38:44 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:38:44 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:39:22 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:39:22 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:39:23 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:39:23 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:39:25 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:39:25 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:39:25 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:39:25 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:39:25 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:39:25 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:39:25 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:39:25 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:39:25 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:39:25 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+***** predict metrics *****
+  predict_bleu-4                 =     1.4552
+  predict_model_preparation_time =      0.003
+  predict_rouge-1                =    11.0984
+  predict_rouge-2                =     3.0446
+  predict_rouge-l                =     5.0312
+  predict_runtime                = 0:01:03.40
+  predict_samples_per_second     =      0.789
+  predict_steps_per_second       =      0.063
+08/18/2024 17:40:28 - INFO - llamafactory.train.sft.trainer - Saving prediction results to ./results/inference/Llama2-7B/llama2_predict_2/generated_predictions.jsonl
diff --git a/results/inference/Llama2-7B/llama2_predict_2_single.log b/results/inference/Llama2-7B/llama2_predict_2_single.log
new file mode 100644
index 00000000..ffb6573b
--- /dev/null
+++ b/results/inference/Llama2-7B/llama2_predict_2_single.log
@@ -0,0 +1,20 @@
+08/18/2024 17:49:41 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: None
+08/18/2024 17:49:42 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+eval example:
+input_ids:
+[1, 518, 25580, 29962, 29871, 30982, 31695, 31863, 31577, 30210, 30457, 30502, 31302, 30858, 30267, 518, 29914, 25580, 29962]
+inputs:
+<s> [INST] 保持健康的三个提示。 [/INST]
+08/18/2024 17:49:47 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:50:16 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:50:16 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+***** predict metrics *****
+  predict_bleu-4                 =     1.2952
+  predict_model_preparation_time =      0.003
+  predict_rouge-1                =    12.8381
+  predict_rouge-2                =     3.2551
+  predict_rouge-l                =     5.1021
+  predict_runtime                = 0:06:01.42
+  predict_samples_per_second     =      0.138
+  predict_steps_per_second       =      0.069
+08/18/2024 17:56:18 - INFO - llamafactory.train.sft.trainer - Saving prediction results to ./results/inference/Llama2-7B/llama2_predict_2_single/generated_predictions.jsonl
diff --git a/results/inference/Llama2-7B/llama2_predict_3.log b/results/inference/Llama2-7B/llama2_predict_3.log
new file mode 100644
index 00000000..d7e7b8e7
--- /dev/null
+++ b/results/inference/Llama2-7B/llama2_predict_3.log
@@ -0,0 +1,58 @@
+08/18/2024 17:40:35 - INFO - llamafactory.cli - Initializing distributed tasks at: 127.0.0.1:24151
+08/18/2024 17:40:42 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:40:42 - INFO - llamafactory.hparams.parser - Process rank: 2, device: cuda:2, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:40:43 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:40:43 - INFO - llamafactory.hparams.parser - Process rank: 5, device: cuda:5, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:40:43 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:40:43 - INFO - llamafactory.hparams.parser - Process rank: 6, device: cuda:6, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:40:43 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:40:43 - INFO - llamafactory.hparams.parser - Process rank: 4, device: cuda:4, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:40:43 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:40:43 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:40:43 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:40:43 - INFO - llamafactory.hparams.parser - Process rank: 1, device: cuda:1, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:40:43 - WARNING - llamafactory.hparams.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
+08/18/2024 17:40:43 - INFO - llamafactory.hparams.parser - Process rank: 3, device: cuda:3, n_gpu: 1, distributed training: True, compute dtype: None
+08/18/2024 17:40:44 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:40:54 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:40:54 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:40:54 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:40:54 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:40:54 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+08/18/2024 17:40:54 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+eval example:
+input_ids:
+[1, 518, 25580, 29962, 29871, 30982, 31695, 31863, 31577, 30210, 30457, 30502, 31302, 30858, 30267, 518, 29914, 25580, 29962]
+inputs:
+<s> [INST] 保持健康的三个提示。 [/INST]
+08/18/2024 17:41:09 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:41:09 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:41:09 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:41:09 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:41:09 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:41:09 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:41:09 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:41:48 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:41:48 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:41:48 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:41:48 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:41:48 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:41:48 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:41:48 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:41:48 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:41:48 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:41:48 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:41:48 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:41:48 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+08/18/2024 17:41:49 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:41:49 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+***** predict metrics *****
+  predict_bleu-4                 =     1.4552
+  predict_model_preparation_time =     0.0031
+  predict_rouge-1                =    11.0984
+  predict_rouge-2                =     3.0446
+  predict_rouge-l                =     5.0312
+  predict_runtime                = 0:01:03.96
+  predict_samples_per_second     =      0.782
+  predict_steps_per_second       =      0.063
+08/18/2024 17:42:52 - INFO - llamafactory.train.sft.trainer - Saving prediction results to ./results/inference/Llama2-7B/llama2_predict_3/generated_predictions.jsonl
diff --git a/results/inference/Llama2-7B/llama2_predict_3_single.log b/results/inference/Llama2-7B/llama2_predict_3_single.log
new file mode 100644
index 00000000..b425c65b
--- /dev/null
+++ b/results/inference/Llama2-7B/llama2_predict_3_single.log
@@ -0,0 +1,20 @@
+08/18/2024 17:56:23 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: None
+08/18/2024 17:56:24 - INFO - llamafactory.data.loader - Loading dataset llamafactory/alpaca_gpt4_zh...
+eval example:
+input_ids:
+[1, 518, 25580, 29962, 29871, 30982, 31695, 31863, 31577, 30210, 30457, 30502, 31302, 30858, 30267, 518, 29914, 25580, 29962]
+inputs:
+<s> [INST] 保持健康的三个提示。 [/INST]
+08/18/2024 17:56:29 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.
+08/18/2024 17:56:58 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.
+08/18/2024 17:56:58 - INFO - llamafactory.model.loader - all params: 6,738,415,616
+***** predict metrics *****
+  predict_bleu-4                 =     1.2952
+  predict_model_preparation_time =      0.003
+  predict_rouge-1                =    12.8381
+  predict_rouge-2                =     3.2551
+  predict_rouge-l                =     5.1021
+  predict_runtime                = 0:06:01.71
+  predict_samples_per_second     =      0.138
+  predict_steps_per_second       =      0.069
+08/18/2024 18:03:00 - INFO - llamafactory.train.sft.trainer - Saving prediction results to ./results/inference/Llama2-7B/llama2_predict_3_single/generated_predictions.jsonl