diff --git a/src/llmtuner/tuner/sft/metric.py b/src/llmtuner/tuner/sft/metric.py index 3f13f3c7..8e67cc79 100644 --- a/src/llmtuner/tuner/sft/metric.py +++ b/src/llmtuner/tuner/sft/metric.py @@ -23,7 +23,7 @@ class ComputeMetrics: Uses the model predictions to compute metrics. """ preds, labels = eval_preds - score_dict = {"rouge-1": [], "rouge-2": [], "rouge-l": [], "bleu-4": []} + score_dict = {"accuracy": [], "rouge-1": [], "rouge-2": [], "rouge-l": [], "bleu-4": []} preds = np.where(preds != IGNORE_INDEX, preds, self.tokenizer.pad_token_id) labels = np.where(labels != IGNORE_INDEX, labels, self.tokenizer.pad_token_id) @@ -47,5 +47,6 @@ class ComputeMetrics: bleu_score = sentence_bleu([list(label)], list(pred), smoothing_function=SmoothingFunction().method3) score_dict["bleu-4"].append(round(bleu_score * 100, 4)) + score_dict["accuracy"].append(float(len(label) != 0 and pred[:len(label)] == label)) return {k: float(np.mean(v)) for k, v in score_dict.items()} diff --git a/src/llmtuner/webui/components/eval.py b/src/llmtuner/webui/components/eval.py index 82d7f658..9e958e1a 100644 --- a/src/llmtuner/webui/components/eval.py +++ b/src/llmtuner/webui/components/eval.py @@ -21,8 +21,10 @@ def create_eval_tab(top_elems: Dict[str, Component], runner: Runner) -> Dict[str preview_btn.click(get_preview, [dataset_dir, dataset], [preview_count, preview_samples, preview_box]) with gr.Row(): + max_source_length = gr.Slider(value=512, minimum=4, maximum=4096, step=1) + max_target_length = gr.Slider(value=512, minimum=4, maximum=4096, step=1) max_samples = gr.Textbox(value="100000") - batch_size = gr.Slider(value=8, minimum=1, maximum=128, step=1) + batch_size = gr.Slider(value=8, minimum=1, maximum=512, step=1) predict = gr.Checkbox(value=True) with gr.Row(): @@ -43,6 +45,8 @@ def create_eval_tab(top_elems: Dict[str, Component], runner: Runner) -> Dict[str top_elems["source_prefix"], dataset_dir, dataset, + max_source_length, + max_target_length, max_samples, batch_size, predict @@ -58,6 +62,8 @@ def create_eval_tab(top_elems: Dict[str, Component], runner: Runner) -> Dict[str preview_count=preview_count, preview_samples=preview_samples, close_btn=close_btn, + max_source_length=max_source_length, + max_target_length=max_target_length, max_samples=max_samples, batch_size=batch_size, predict=predict, diff --git a/src/llmtuner/webui/components/sft.py b/src/llmtuner/webui/components/sft.py index b0e785da..60d86c9f 100644 --- a/src/llmtuner/webui/components/sft.py +++ b/src/llmtuner/webui/components/sft.py @@ -23,21 +23,24 @@ def create_sft_tab(top_elems: Dict[str, Component], runner: Runner) -> Dict[str, preview_btn.click(get_preview, [dataset_dir, dataset], [preview_count, preview_samples, preview_box]) with gr.Row(): + max_source_length = gr.Slider(value=512, minimum=4, maximum=4096, step=1) + max_target_length = gr.Slider(value=512, minimum=4, maximum=4096, step=1) learning_rate = gr.Textbox(value="5e-5") num_train_epochs = gr.Textbox(value="3.0") max_samples = gr.Textbox(value="100000") with gr.Row(): - batch_size = gr.Slider(value=4, minimum=1, maximum=128, step=1) - gradient_accumulation_steps = gr.Slider(value=4, minimum=1, maximum=32, step=1) + batch_size = gr.Slider(value=4, minimum=1, maximum=512, step=1) + gradient_accumulation_steps = gr.Slider(value=4, minimum=1, maximum=512, step=1) lr_scheduler_type = gr.Dropdown( value="cosine", choices=[scheduler.value for scheduler in SchedulerType] ) + dev_ratio = gr.Slider(value=0, minimum=0, maximum=1, step=0.001) fp16 = gr.Checkbox(value=True) with gr.Row(): logging_steps = gr.Slider(value=5, minimum=5, maximum=1000, step=5) - save_steps = gr.Slider(value=100, minimum=10, maximum=2000, step=10) + save_steps = gr.Slider(value=100, minimum=10, maximum=5000, step=10) with gr.Row(): start_btn = gr.Button() @@ -63,12 +66,15 @@ def create_sft_tab(top_elems: Dict[str, Component], runner: Runner) -> Dict[str, top_elems["source_prefix"], dataset_dir, dataset, + max_source_length, + max_target_length, learning_rate, num_train_epochs, max_samples, batch_size, gradient_accumulation_steps, lr_scheduler_type, + dev_ratio, fp16, logging_steps, save_steps, @@ -89,12 +95,15 @@ def create_sft_tab(top_elems: Dict[str, Component], runner: Runner) -> Dict[str, preview_count=preview_count, preview_samples=preview_samples, close_btn=close_btn, + max_source_length=max_source_length, + max_target_length=max_target_length, learning_rate=learning_rate, num_train_epochs=num_train_epochs, max_samples=max_samples, batch_size=batch_size, gradient_accumulation_steps=gradient_accumulation_steps, lr_scheduler_type=lr_scheduler_type, + dev_ratio=dev_ratio, fp16=fp16, logging_steps=logging_steps, save_steps=save_steps, diff --git a/src/llmtuner/webui/locales.py b/src/llmtuner/webui/locales.py index 398f3c75..6800f579 100644 --- a/src/llmtuner/webui/locales.py +++ b/src/llmtuner/webui/locales.py @@ -129,6 +129,26 @@ LOCALES = { "value": "关闭" } }, + "max_source_length": { + "en": { + "label": "Max source length", + "info": "Max tokens in source sequence." + }, + "zh": { + "label": "输入序列最大长度", + "info": "输入序列分词后的最大长度。" + } + }, + "max_target_length": { + "en": { + "label": "Max target length", + "info": "Max tokens in target sequence." + }, + "zh": { + "label": "输出序列最大长度", + "info": "输出序列分词后的最大长度。" + } + }, "learning_rate": { "en": { "label": "Learning rate", @@ -189,6 +209,16 @@ LOCALES = { "info": "采用的学习率调节器名称。" } }, + "dev_ratio": { + "en": { + "label": "Dev ratio", + "info": "Proportion of data in the dev set." + }, + "zh": { + "label": "验证集比例", + "info": "验证集占全部样本的百分比。" + } + }, "fp16": { "en": { "label": "fp16", diff --git a/src/llmtuner/webui/runner.py b/src/llmtuner/webui/runner.py index 408a46cf..98ea1c7f 100644 --- a/src/llmtuner/webui/runner.py +++ b/src/llmtuner/webui/runner.py @@ -69,12 +69,15 @@ class Runner: source_prefix: str, dataset_dir: str, dataset: List[str], + max_source_length: int, + max_target_length: int, learning_rate: str, num_train_epochs: str, max_samples: str, batch_size: int, gradient_accumulation_steps: int, lr_scheduler_type: str, + dev_ratio: float, fp16: bool, logging_steps: int, save_steps: int, @@ -104,6 +107,8 @@ class Runner: source_prefix=source_prefix, dataset_dir=dataset_dir, dataset=",".join(dataset), + max_source_length=max_source_length, + max_target_length=max_target_length, learning_rate=float(learning_rate), num_train_epochs=float(num_train_epochs), max_samples=int(max_samples), @@ -115,6 +120,13 @@ class Runner: save_steps=save_steps, output_dir=os.path.join(get_save_dir(model_name), finetuning_type, output_dir) ) + + if dev_ratio > 1e-6: + args["dev_ratio"] = dev_ratio + args["evaluation_strategy"] = "steps" + args["eval_steps"] = save_steps + args["load_best_model_at_end"] = True + model_args, data_args, training_args, finetuning_args, _ = get_train_args(args) run_args = dict( @@ -147,6 +159,8 @@ class Runner: source_prefix: str, dataset_dir: str, dataset: List[str], + max_source_length: int, + max_target_length: int, max_samples: str, batch_size: int, predict: bool @@ -177,6 +191,8 @@ class Runner: source_prefix=source_prefix, dataset_dir=dataset_dir, dataset=",".join(dataset), + max_source_length=max_source_length, + max_target_length=max_target_length, max_samples=int(max_samples), per_device_eval_batch_size=batch_size, output_dir=output_dir