update ui
This commit is contained in:
parent
a4a1ddbcb9
commit
7f770f6895
|
@ -95,12 +95,11 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
resize_vocab = gr.Checkbox()
|
|
||||||
packing = gr.Checkbox()
|
packing = gr.Checkbox()
|
||||||
efficient_packing = gr.Checkbox()
|
neat_packing = gr.Checkbox()
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
upcast_layernorm = gr.Checkbox()
|
resize_vocab = gr.Checkbox()
|
||||||
use_llama_pro = gr.Checkbox()
|
use_llama_pro = gr.Checkbox()
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
|
@ -114,10 +113,9 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||||
warmup_steps,
|
warmup_steps,
|
||||||
neftune_alpha,
|
neftune_alpha,
|
||||||
optim,
|
optim,
|
||||||
resize_vocab,
|
|
||||||
packing,
|
packing,
|
||||||
efficient_packing,
|
neat_packing,
|
||||||
upcast_layernorm,
|
resize_vocab,
|
||||||
use_llama_pro,
|
use_llama_pro,
|
||||||
shift_attn,
|
shift_attn,
|
||||||
report_to,
|
report_to,
|
||||||
|
@ -131,10 +129,9 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||||
warmup_steps=warmup_steps,
|
warmup_steps=warmup_steps,
|
||||||
neftune_alpha=neftune_alpha,
|
neftune_alpha=neftune_alpha,
|
||||||
optim=optim,
|
optim=optim,
|
||||||
resize_vocab=resize_vocab,
|
|
||||||
packing=packing,
|
packing=packing,
|
||||||
efficient_packing=efficient_packing,
|
neat_packing=neat_packing,
|
||||||
upcast_layernorm=upcast_layernorm,
|
resize_vocab=resize_vocab,
|
||||||
use_llama_pro=use_llama_pro,
|
use_llama_pro=use_llama_pro,
|
||||||
shift_attn=shift_attn,
|
shift_attn=shift_attn,
|
||||||
report_to=report_to,
|
report_to=report_to,
|
||||||
|
@ -331,7 +328,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||||
)
|
)
|
||||||
|
|
||||||
dataset.focus(list_datasets, [dataset_dir, training_stage], [dataset], queue=False)
|
dataset.focus(list_datasets, [dataset_dir, training_stage], [dataset], queue=False)
|
||||||
training_stage.change(change_stage, [training_stage], [dataset, packing, efficient_packing], queue=False)
|
training_stage.change(change_stage, [training_stage], [dataset, packing], queue=False)
|
||||||
reward_model.focus(list_checkpoints, [model_name, finetuning_type], [reward_model], queue=False)
|
reward_model.focus(list_checkpoints, [model_name, finetuning_type], [reward_model], queue=False)
|
||||||
model_name.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False)
|
model_name.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False)
|
||||||
finetuning_type.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False)
|
finetuning_type.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False)
|
||||||
|
|
|
@ -494,20 +494,6 @@ LOCALES = {
|
||||||
"info": "使用的优化器:adamw_torch、adamw_8bit 或 adafactor。",
|
"info": "使用的优化器:adamw_torch、adamw_8bit 或 adafactor。",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"resize_vocab": {
|
|
||||||
"en": {
|
|
||||||
"label": "Resize token embeddings",
|
|
||||||
"info": "Resize the tokenizer vocab and the embedding layers.",
|
|
||||||
},
|
|
||||||
"ru": {
|
|
||||||
"label": "Изменение размера токенных эмбеддингов",
|
|
||||||
"info": "Изменить размер словаря токенизатора и слоев эмбеддинга.",
|
|
||||||
},
|
|
||||||
"zh": {
|
|
||||||
"label": "更改词表大小",
|
|
||||||
"info": "更改分词器词表和嵌入层的大小。",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"packing": {
|
"packing": {
|
||||||
"en": {
|
"en": {
|
||||||
"label": "Pack sequences",
|
"label": "Pack sequences",
|
||||||
|
@ -522,32 +508,32 @@ LOCALES = {
|
||||||
"info": "将序列打包为等长样本。",
|
"info": "将序列打包为等长样本。",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"efficient_packing": {
|
"neat_packing": {
|
||||||
"en": {
|
"en": {
|
||||||
"label": "Pack sequences for efficient training",
|
"label": "Use neat packing",
|
||||||
"info": "Pack sequences into samples of fixed length without cross-contamination attention for efficient training.",
|
"info": "Avoid cross-attention between packed sequences.",
|
||||||
},
|
},
|
||||||
"ru": {
|
"ru": {
|
||||||
"label": "Пакетные последовательности для эффективного обучения",
|
"label": "Используйте аккуратную упаковку",
|
||||||
"info": "Упакуйте последовательности в образцы фиксированной длины без учета перекрестного загрязнения для эффективного обучения.",
|
"info": "избегайте перекрестного внимания между упакованными последовательностями.",
|
||||||
},
|
},
|
||||||
"zh": {
|
"zh": {
|
||||||
"label": "打包序列以实现高效训练",
|
"label": "使用无污染打包",
|
||||||
"info": "为了提高训练效率,将序列打包成固定长度的样本,无需注意交叉污染。",
|
"info": "避免打包后的序列产生交叉注意力。",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"upcast_layernorm": {
|
"resize_vocab": {
|
||||||
"en": {
|
"en": {
|
||||||
"label": "Upcast LayerNorm",
|
"label": "Resize token embeddings",
|
||||||
"info": "Upcast weights of layernorm in float32.",
|
"info": "Resize the tokenizer vocab and the embedding layers.",
|
||||||
},
|
},
|
||||||
"ru": {
|
"ru": {
|
||||||
"label": "Приведение весов LayerNorm",
|
"label": "Изменение размера токенных эмбеддингов",
|
||||||
"info": "Приведение весов LayerNorm к float32.",
|
"info": "Изменить размер словаря токенизатора и слоев эмбеддинга.",
|
||||||
},
|
},
|
||||||
"zh": {
|
"zh": {
|
||||||
"label": "缩放归一化层",
|
"label": "更改词表大小",
|
||||||
"info": "将归一化层权重缩放至 32 位精度。",
|
"info": "更改分词器词表和嵌入层的大小。",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"use_llama_pro": {
|
"use_llama_pro": {
|
||||||
|
|
|
@ -138,10 +138,9 @@ class Runner:
|
||||||
warmup_steps=get("train.warmup_steps"),
|
warmup_steps=get("train.warmup_steps"),
|
||||||
neftune_noise_alpha=get("train.neftune_alpha") or None,
|
neftune_noise_alpha=get("train.neftune_alpha") or None,
|
||||||
optim=get("train.optim"),
|
optim=get("train.optim"),
|
||||||
|
packing=get("train.packing") or get("train.neat_packing"),
|
||||||
|
neat_packing=get("train.neat_packing"),
|
||||||
resize_vocab=get("train.resize_vocab"),
|
resize_vocab=get("train.resize_vocab"),
|
||||||
packing=get("train.packing"),
|
|
||||||
efficient_packing=get("train.efficient_packing"),
|
|
||||||
upcast_layernorm=get("train.upcast_layernorm"),
|
|
||||||
use_llama_pro=get("train.use_llama_pro"),
|
use_llama_pro=get("train.use_llama_pro"),
|
||||||
shift_attn=get("train.shift_attn"),
|
shift_attn=get("train.shift_attn"),
|
||||||
report_to="all" if get("train.report_to") else "none",
|
report_to="all" if get("train.report_to") else "none",
|
||||||
|
|
Loading…
Reference in New Issue