update ui

This commit is contained in:
hiyouga 2024-07-03 23:13:49 +08:00
parent a4a1ddbcb9
commit 7f770f6895
3 changed files with 23 additions and 41 deletions

View File

@ -95,12 +95,11 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
with gr.Row(): with gr.Row():
with gr.Column(): with gr.Column():
resize_vocab = gr.Checkbox()
packing = gr.Checkbox() packing = gr.Checkbox()
efficient_packing = gr.Checkbox() neat_packing = gr.Checkbox()
with gr.Column(): with gr.Column():
upcast_layernorm = gr.Checkbox() resize_vocab = gr.Checkbox()
use_llama_pro = gr.Checkbox() use_llama_pro = gr.Checkbox()
with gr.Column(): with gr.Column():
@ -114,10 +113,9 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
warmup_steps, warmup_steps,
neftune_alpha, neftune_alpha,
optim, optim,
resize_vocab,
packing, packing,
efficient_packing, neat_packing,
upcast_layernorm, resize_vocab,
use_llama_pro, use_llama_pro,
shift_attn, shift_attn,
report_to, report_to,
@ -131,10 +129,9 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
warmup_steps=warmup_steps, warmup_steps=warmup_steps,
neftune_alpha=neftune_alpha, neftune_alpha=neftune_alpha,
optim=optim, optim=optim,
resize_vocab=resize_vocab,
packing=packing, packing=packing,
efficient_packing=efficient_packing, neat_packing=neat_packing,
upcast_layernorm=upcast_layernorm, resize_vocab=resize_vocab,
use_llama_pro=use_llama_pro, use_llama_pro=use_llama_pro,
shift_attn=shift_attn, shift_attn=shift_attn,
report_to=report_to, report_to=report_to,
@ -331,7 +328,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
) )
dataset.focus(list_datasets, [dataset_dir, training_stage], [dataset], queue=False) dataset.focus(list_datasets, [dataset_dir, training_stage], [dataset], queue=False)
training_stage.change(change_stage, [training_stage], [dataset, packing, efficient_packing], queue=False) training_stage.change(change_stage, [training_stage], [dataset, packing], queue=False)
reward_model.focus(list_checkpoints, [model_name, finetuning_type], [reward_model], queue=False) reward_model.focus(list_checkpoints, [model_name, finetuning_type], [reward_model], queue=False)
model_name.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False) model_name.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False)
finetuning_type.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False) finetuning_type.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False)

View File

@ -494,20 +494,6 @@ LOCALES = {
"info": "使用的优化器adamw_torch、adamw_8bit 或 adafactor。", "info": "使用的优化器adamw_torch、adamw_8bit 或 adafactor。",
}, },
}, },
"resize_vocab": {
"en": {
"label": "Resize token embeddings",
"info": "Resize the tokenizer vocab and the embedding layers.",
},
"ru": {
"label": "Изменение размера токенных эмбеддингов",
"info": "Изменить размер словаря токенизатора и слоев эмбеддинга.",
},
"zh": {
"label": "更改词表大小",
"info": "更改分词器词表和嵌入层的大小。",
},
},
"packing": { "packing": {
"en": { "en": {
"label": "Pack sequences", "label": "Pack sequences",
@ -522,32 +508,32 @@ LOCALES = {
"info": "将序列打包为等长样本。", "info": "将序列打包为等长样本。",
}, },
}, },
"efficient_packing": { "neat_packing": {
"en": { "en": {
"label": "Pack sequences for efficient training", "label": "Use neat packing",
"info": "Pack sequences into samples of fixed length without cross-contamination attention for efficient training.", "info": "Avoid cross-attention between packed sequences.",
}, },
"ru": { "ru": {
"label": "Пакетные последовательности для эффективного обучения", "label": "Используйте аккуратную упаковку",
"info": "Упакуйте последовательности в образцы фиксированной длины без учета перекрестного загрязнения для эффективного обучения.", "info": "избегайте перекрестного внимания между упакованными последовательностями.",
}, },
"zh": { "zh": {
"label": "打包序列以实现高效训练", "label": "使用无污染打包",
"info": "为了提高训练效率,将序列打包成固定长度的样本,无需注意交叉污染", "info": "避免打包后的序列产生交叉注意力",
}, },
}, },
"upcast_layernorm": { "resize_vocab": {
"en": { "en": {
"label": "Upcast LayerNorm", "label": "Resize token embeddings",
"info": "Upcast weights of layernorm in float32.", "info": "Resize the tokenizer vocab and the embedding layers.",
}, },
"ru": { "ru": {
"label": "Приведение весов LayerNorm", "label": "Изменение размера токенных эмбеддингов",
"info": "Приведение весов LayerNorm к float32.", "info": "Изменить размер словаря токенизатора и слоев эмбеддинга.",
}, },
"zh": { "zh": {
"label": "缩放归一化层", "label": "更改词表大小",
"info": "将归一化层权重缩放至 32 位精度", "info": "更改分词器词表和嵌入层的大小",
}, },
}, },
"use_llama_pro": { "use_llama_pro": {

View File

@ -138,10 +138,9 @@ class Runner:
warmup_steps=get("train.warmup_steps"), warmup_steps=get("train.warmup_steps"),
neftune_noise_alpha=get("train.neftune_alpha") or None, neftune_noise_alpha=get("train.neftune_alpha") or None,
optim=get("train.optim"), optim=get("train.optim"),
packing=get("train.packing") or get("train.neat_packing"),
neat_packing=get("train.neat_packing"),
resize_vocab=get("train.resize_vocab"), resize_vocab=get("train.resize_vocab"),
packing=get("train.packing"),
efficient_packing=get("train.efficient_packing"),
upcast_layernorm=get("train.upcast_layernorm"),
use_llama_pro=get("train.use_llama_pro"), use_llama_pro=get("train.use_llama_pro"),
shift_attn=get("train.shift_attn"), shift_attn=get("train.shift_attn"),
report_to="all" if get("train.report_to") else "none", report_to="all" if get("train.report_to") else "none",