update ui

This commit is contained in:
hiyouga 2024-07-03 23:13:49 +08:00
parent a4a1ddbcb9
commit 7f770f6895
3 changed files with 23 additions and 41 deletions

View File

@ -95,12 +95,11 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
with gr.Row():
with gr.Column():
resize_vocab = gr.Checkbox()
packing = gr.Checkbox()
efficient_packing = gr.Checkbox()
neat_packing = gr.Checkbox()
with gr.Column():
upcast_layernorm = gr.Checkbox()
resize_vocab = gr.Checkbox()
use_llama_pro = gr.Checkbox()
with gr.Column():
@ -114,10 +113,9 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
warmup_steps,
neftune_alpha,
optim,
resize_vocab,
packing,
efficient_packing,
upcast_layernorm,
neat_packing,
resize_vocab,
use_llama_pro,
shift_attn,
report_to,
@ -131,10 +129,9 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
warmup_steps=warmup_steps,
neftune_alpha=neftune_alpha,
optim=optim,
resize_vocab=resize_vocab,
packing=packing,
efficient_packing=efficient_packing,
upcast_layernorm=upcast_layernorm,
neat_packing=neat_packing,
resize_vocab=resize_vocab,
use_llama_pro=use_llama_pro,
shift_attn=shift_attn,
report_to=report_to,
@ -331,7 +328,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
)
dataset.focus(list_datasets, [dataset_dir, training_stage], [dataset], queue=False)
training_stage.change(change_stage, [training_stage], [dataset, packing, efficient_packing], queue=False)
training_stage.change(change_stage, [training_stage], [dataset, packing], queue=False)
reward_model.focus(list_checkpoints, [model_name, finetuning_type], [reward_model], queue=False)
model_name.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False)
finetuning_type.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False)

View File

@ -494,20 +494,6 @@ LOCALES = {
"info": "使用的优化器adamw_torch、adamw_8bit 或 adafactor。",
},
},
"resize_vocab": {
"en": {
"label": "Resize token embeddings",
"info": "Resize the tokenizer vocab and the embedding layers.",
},
"ru": {
"label": "Изменение размера токенных эмбеддингов",
"info": "Изменить размер словаря токенизатора и слоев эмбеддинга.",
},
"zh": {
"label": "更改词表大小",
"info": "更改分词器词表和嵌入层的大小。",
},
},
"packing": {
"en": {
"label": "Pack sequences",
@ -522,32 +508,32 @@ LOCALES = {
"info": "将序列打包为等长样本。",
},
},
"efficient_packing": {
"neat_packing": {
"en": {
"label": "Pack sequences for efficient training",
"info": "Pack sequences into samples of fixed length without cross-contamination attention for efficient training.",
"label": "Use neat packing",
"info": "Avoid cross-attention between packed sequences.",
},
"ru": {
"label": "Пакетные последовательности для эффективного обучения",
"info": "Упакуйте последовательности в образцы фиксированной длины без учета перекрестного загрязнения для эффективного обучения.",
"label": "Используйте аккуратную упаковку",
"info": "избегайте перекрестного внимания между упакованными последовательностями.",
},
"zh": {
"label": "打包序列以实现高效训练",
"info": "为了提高训练效率,将序列打包成固定长度的样本,无需注意交叉污染",
"label": "使用无污染打包",
"info": "避免打包后的序列产生交叉注意力",
},
},
"upcast_layernorm": {
"resize_vocab": {
"en": {
"label": "Upcast LayerNorm",
"info": "Upcast weights of layernorm in float32.",
"label": "Resize token embeddings",
"info": "Resize the tokenizer vocab and the embedding layers.",
},
"ru": {
"label": "Приведение весов LayerNorm",
"info": "Приведение весов LayerNorm к float32.",
"label": "Изменение размера токенных эмбеддингов",
"info": "Изменить размер словаря токенизатора и слоев эмбеддинга.",
},
"zh": {
"label": "缩放归一化层",
"info": "将归一化层权重缩放至 32 位精度",
"label": "更改词表大小",
"info": "更改分词器词表和嵌入层的大小",
},
},
"use_llama_pro": {

View File

@ -138,10 +138,9 @@ class Runner:
warmup_steps=get("train.warmup_steps"),
neftune_noise_alpha=get("train.neftune_alpha") or None,
optim=get("train.optim"),
packing=get("train.packing") or get("train.neat_packing"),
neat_packing=get("train.neat_packing"),
resize_vocab=get("train.resize_vocab"),
packing=get("train.packing"),
efficient_packing=get("train.efficient_packing"),
upcast_layernorm=get("train.upcast_layernorm"),
use_llama_pro=get("train.use_llama_pro"),
shift_attn=get("train.shift_attn"),
report_to="all" if get("train.report_to") else "none",