update webui
This commit is contained in:
parent
c1a53a0deb
commit
af596988b1
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any, Dict, Literal, Sequence
|
from typing import Any, Dict, Literal, Optional, Sequence
|
||||||
|
|
||||||
import fire
|
import fire
|
||||||
import torch
|
import torch
|
||||||
|
@ -53,6 +53,7 @@ def cal_ppl(
|
||||||
dataset_dir: str = "data",
|
dataset_dir: str = "data",
|
||||||
template: str = "default",
|
template: str = "default",
|
||||||
cutoff_len: int = 1024,
|
cutoff_len: int = 1024,
|
||||||
|
max_samples: Optional[int] = None,
|
||||||
train_on_prompt: bool = False,
|
train_on_prompt: bool = False,
|
||||||
):
|
):
|
||||||
model_args, data_args, training_args, finetuning_args, _ = get_train_args(
|
model_args, data_args, training_args, finetuning_args, _ = get_train_args(
|
||||||
|
@ -63,6 +64,7 @@ def cal_ppl(
|
||||||
dataset_dir=dataset_dir,
|
dataset_dir=dataset_dir,
|
||||||
template=template,
|
template=template,
|
||||||
cutoff_len=cutoff_len,
|
cutoff_len=cutoff_len,
|
||||||
|
max_samples=max_samples,
|
||||||
train_on_prompt=train_on_prompt,
|
train_on_prompt=train_on_prompt,
|
||||||
output_dir="dummy_dir",
|
output_dir="dummy_dir",
|
||||||
overwrite_cache=True,
|
overwrite_cache=True,
|
||||||
|
|
|
@ -36,9 +36,9 @@ def create_chat_box(
|
||||||
submit_btn = gr.Button(variant="primary")
|
submit_btn = gr.Button(variant="primary")
|
||||||
|
|
||||||
with gr.Column(scale=1):
|
with gr.Column(scale=1):
|
||||||
max_new_tokens = gr.Slider(8, 4096, value=512, step=1)
|
max_new_tokens = gr.Slider(minimum=8, maximum=4096, value=512, step=1)
|
||||||
top_p = gr.Slider(0.01, 1.0, value=0.7, step=0.01)
|
top_p = gr.Slider(minimum=0.01, maximum=1.0, value=0.7, step=0.01)
|
||||||
temperature = gr.Slider(0.01, 1.5, value=0.95, step=0.01)
|
temperature = gr.Slider(minimum=0.01, maximum=1.5, value=0.95, step=0.01)
|
||||||
clear_btn = gr.Button()
|
clear_btn = gr.Button()
|
||||||
|
|
||||||
tools.input(check_json_schema, inputs=[tools, engine.manager.get_elem_by_id("top.lang")])
|
tools.input(check_json_schema, inputs=[tools, engine.manager.get_elem_by_id("top.lang")])
|
||||||
|
|
|
@ -28,18 +28,18 @@ def create_eval_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||||
elem_dict.update(dict(dataset_dir=dataset_dir, dataset=dataset, **preview_elems))
|
elem_dict.update(dict(dataset_dir=dataset_dir, dataset=dataset, **preview_elems))
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
cutoff_len = gr.Slider(value=1024, minimum=4, maximum=65536, step=1)
|
cutoff_len = gr.Slider(minimum=4, maximum=65536, value=1024, step=1)
|
||||||
max_samples = gr.Textbox(value="100000")
|
max_samples = gr.Textbox(value="100000")
|
||||||
batch_size = gr.Slider(value=2, minimum=1, maximum=1024, step=1)
|
batch_size = gr.Slider(minimum=1, maximum=1024, value=2, step=1)
|
||||||
predict = gr.Checkbox(value=True)
|
predict = gr.Checkbox(value=True)
|
||||||
|
|
||||||
input_elems.update({cutoff_len, max_samples, batch_size, predict})
|
input_elems.update({cutoff_len, max_samples, batch_size, predict})
|
||||||
elem_dict.update(dict(cutoff_len=cutoff_len, max_samples=max_samples, batch_size=batch_size, predict=predict))
|
elem_dict.update(dict(cutoff_len=cutoff_len, max_samples=max_samples, batch_size=batch_size, predict=predict))
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
max_new_tokens = gr.Slider(10, 2048, value=128, step=1)
|
max_new_tokens = gr.Slider(minimum=8, maximum=4096, value=512, step=1)
|
||||||
top_p = gr.Slider(0.01, 1, value=0.7, step=0.01)
|
top_p = gr.Slider(minimum=0.01, maximum=1, value=0.7, step=0.01)
|
||||||
temperature = gr.Slider(0.01, 1.5, value=0.95, step=0.01)
|
temperature = gr.Slider(minimum=0.01, maximum=1.5, value=0.95, step=0.01)
|
||||||
output_dir = gr.Textbox()
|
output_dir = gr.Textbox()
|
||||||
|
|
||||||
input_elems.update({max_new_tokens, top_p, temperature, output_dir})
|
input_elems.update({max_new_tokens, top_p, temperature, output_dir})
|
||||||
|
|
|
@ -85,7 +85,7 @@ def save_model(
|
||||||
|
|
||||||
def create_export_tab(engine: "Engine") -> Dict[str, "Component"]:
|
def create_export_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
export_size = gr.Slider(value=1, minimum=1, maximum=100, step=1)
|
export_size = gr.Slider(minimum=1, maximum=100, value=1, step=1)
|
||||||
export_quantization_bit = gr.Dropdown(choices=["none", "8", "4", "3", "2"], value="none")
|
export_quantization_bit = gr.Dropdown(choices=["none", "8", "4", "3", "2"], value="none")
|
||||||
export_quantization_dataset = gr.Textbox(value="data/c4_demo.json")
|
export_quantization_dataset = gr.Textbox(value="data/c4_demo.json")
|
||||||
export_device = gr.Radio(choices=["cpu", "cuda"], value="cpu")
|
export_device = gr.Radio(choices=["cpu", "cuda"], value="cpu")
|
||||||
|
|
|
@ -52,10 +52,10 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||||
)
|
)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
cutoff_len = gr.Slider(value=1024, minimum=4, maximum=65536, step=1)
|
cutoff_len = gr.Slider(minimum=4, maximum=65536, value=1024, step=1)
|
||||||
batch_size = gr.Slider(value=2, minimum=1, maximum=1024, step=1)
|
batch_size = gr.Slider(minimum=1, maximum=1024, value=2, step=1)
|
||||||
gradient_accumulation_steps = gr.Slider(value=8, minimum=1, maximum=1024, step=1)
|
gradient_accumulation_steps = gr.Slider(minimum=1, maximum=1024, value=8, step=1)
|
||||||
val_size = gr.Slider(value=0, minimum=0, maximum=1, step=0.001)
|
val_size = gr.Slider(minimum=0, maximum=1, value=0, step=0.001)
|
||||||
lr_scheduler_type = gr.Dropdown(choices=[scheduler.value for scheduler in SchedulerType], value="cosine")
|
lr_scheduler_type = gr.Dropdown(choices=[scheduler.value for scheduler in SchedulerType], value="cosine")
|
||||||
|
|
||||||
input_elems.update({cutoff_len, batch_size, gradient_accumulation_steps, val_size, lr_scheduler_type})
|
input_elems.update({cutoff_len, batch_size, gradient_accumulation_steps, val_size, lr_scheduler_type})
|
||||||
|
@ -71,10 +71,10 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||||
|
|
||||||
with gr.Accordion(open=False) as extra_tab:
|
with gr.Accordion(open=False) as extra_tab:
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
logging_steps = gr.Slider(value=5, minimum=5, maximum=1000, step=5)
|
logging_steps = gr.Slider(minimum=1, maximum=1000, value=5, step=5)
|
||||||
save_steps = gr.Slider(value=100, minimum=10, maximum=5000, step=10)
|
save_steps = gr.Slider(minimum=10, maximum=5000, value=100, step=10)
|
||||||
warmup_steps = gr.Slider(value=0, minimum=0, maximum=5000, step=1)
|
warmup_steps = gr.Slider(minimum=0, maximum=5000, value=0, step=1)
|
||||||
neftune_alpha = gr.Slider(value=0, minimum=0, maximum=10, step=0.1)
|
neftune_alpha = gr.Slider(minimum=0, maximum=10, value=0, step=0.1)
|
||||||
optim = gr.Textbox(value="adamw_torch")
|
optim = gr.Textbox(value="adamw_torch")
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
|
@ -124,7 +124,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||||
|
|
||||||
with gr.Accordion(open=False) as freeze_tab:
|
with gr.Accordion(open=False) as freeze_tab:
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
num_layer_trainable = gr.Slider(value=3, minimum=1, maximum=128, step=1)
|
num_layer_trainable = gr.Slider(minimum=1, maximum=128, value=2, step=1)
|
||||||
name_module_trainable = gr.Textbox(value="all")
|
name_module_trainable = gr.Textbox(value="all")
|
||||||
|
|
||||||
input_elems.update({num_layer_trainable, name_module_trainable})
|
input_elems.update({num_layer_trainable, name_module_trainable})
|
||||||
|
@ -136,10 +136,10 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||||
|
|
||||||
with gr.Accordion(open=False) as lora_tab:
|
with gr.Accordion(open=False) as lora_tab:
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
lora_rank = gr.Slider(value=8, minimum=1, maximum=1024, step=1)
|
lora_rank = gr.Slider(minimum=1, maximum=1024, value=8, step=1)
|
||||||
lora_alpha = gr.Slider(value=16, minimum=1, maximum=2048, step=1)
|
lora_alpha = gr.Slider(minimum=1, maximum=2048, value=16, step=1)
|
||||||
lora_dropout = gr.Slider(value=0, minimum=0, maximum=1, step=0.01)
|
lora_dropout = gr.Slider(minimum=0, maximum=1, value=0, step=0.01)
|
||||||
loraplus_lr_ratio = gr.Slider(value=0, minimum=0, maximum=64, step=0.01)
|
loraplus_lr_ratio = gr.Slider(minimum=0, maximum=64, value=0, step=0.01)
|
||||||
create_new_adapter = gr.Checkbox()
|
create_new_adapter = gr.Checkbox()
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
|
@ -180,9 +180,9 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||||
|
|
||||||
with gr.Accordion(open=False) as rlhf_tab:
|
with gr.Accordion(open=False) as rlhf_tab:
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
dpo_beta = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01)
|
dpo_beta = gr.Slider(minimum=0, maximum=1, value=0.1, step=0.01)
|
||||||
dpo_ftx = gr.Slider(value=0, minimum=0, maximum=10, step=0.01)
|
dpo_ftx = gr.Slider(minimum=0, maximum=10, value=0, step=0.01)
|
||||||
orpo_beta = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01)
|
orpo_beta = gr.Slider(minimum=0, maximum=1, value=0.1, step=0.01)
|
||||||
reward_model = gr.Dropdown(multiselect=True, allow_custom_value=True)
|
reward_model = gr.Dropdown(multiselect=True, allow_custom_value=True)
|
||||||
|
|
||||||
input_elems.update({dpo_beta, dpo_ftx, orpo_beta, reward_model})
|
input_elems.update({dpo_beta, dpo_ftx, orpo_beta, reward_model})
|
||||||
|
@ -193,9 +193,9 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||||
with gr.Accordion(open=False) as galore_tab:
|
with gr.Accordion(open=False) as galore_tab:
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
use_galore = gr.Checkbox()
|
use_galore = gr.Checkbox()
|
||||||
galore_rank = gr.Slider(value=16, minimum=1, maximum=1024, step=1)
|
galore_rank = gr.Slider(minimum=1, maximum=1024, value=16, step=1)
|
||||||
galore_update_interval = gr.Slider(value=200, minimum=1, maximum=1024, step=1)
|
galore_update_interval = gr.Slider(minimum=1, maximum=1024, value=200, step=1)
|
||||||
galore_scale = gr.Slider(value=0.25, minimum=0, maximum=1, step=0.01)
|
galore_scale = gr.Slider(minimum=0, maximum=1, value=0.25, step=0.01)
|
||||||
galore_target = gr.Textbox(value="all")
|
galore_target = gr.Textbox(value="all")
|
||||||
|
|
||||||
input_elems.update({use_galore, galore_rank, galore_update_interval, galore_scale, galore_target})
|
input_elems.update({use_galore, galore_rank, galore_update_interval, galore_scale, galore_target})
|
||||||
|
@ -215,8 +215,8 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||||
use_badam = gr.Checkbox()
|
use_badam = gr.Checkbox()
|
||||||
badam_mode = gr.Dropdown(choices=["layer", "ratio"], value="layer")
|
badam_mode = gr.Dropdown(choices=["layer", "ratio"], value="layer")
|
||||||
badam_switch_mode = gr.Dropdown(choices=["ascending", "descending", "random", "fixed"], value="ascending")
|
badam_switch_mode = gr.Dropdown(choices=["ascending", "descending", "random", "fixed"], value="ascending")
|
||||||
badam_switch_interval = gr.Slider(value=50, minimum=1, maximum=1024, step=1)
|
badam_switch_interval = gr.Slider(minimum=1, maximum=1024, value=50, step=1)
|
||||||
badam_update_ratio = gr.Slider(value=0.05, minimum=0, maximum=1, step=0.01)
|
badam_update_ratio = gr.Slider(minimum=0, maximum=1, value=0.05, step=0.01)
|
||||||
|
|
||||||
input_elems.update({use_badam, badam_mode, badam_switch_mode, badam_switch_interval, badam_update_ratio})
|
input_elems.update({use_badam, badam_mode, badam_switch_mode, badam_switch_interval, badam_update_ratio})
|
||||||
elem_dict.update(
|
elem_dict.update(
|
||||||
|
|
Loading…
Reference in New Issue