add new options in webui

This commit is contained in:
hiyouga 2023-10-22 17:17:58 +08:00
parent b79ca8781e
commit f793ca0a2c
6 changed files with 71 additions and 17 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 146 KiB

After

Width:  |  Height:  |  Size: 138 KiB

View File

@ -3,12 +3,16 @@ from types import MethodType
from typing import TYPE_CHECKING, List, Optional
from llmtuner.extras.constants import LAYERNORM_NAMES
from llmtuner.extras.logging import get_logger
if TYPE_CHECKING:
from transformers.modeling_utils import PreTrainedModel
from llmtuner.hparams import FinetuningArguments
logger = get_logger(__name__)
def find_all_linear_modules(
model: "PreTrainedModel",
quantization_bit: Optional[int] = None,
@ -49,6 +53,7 @@ def prepare_model_for_training(
for name, param in model.named_parameters():
if param.ndim == 1 and any(ln_name in name for ln_name in layernorm_names):
param.data = param.data.to(torch.float32)
logger.info("Upcasting weights in layernorm in float32.")
if finetuning_args.neft_alpha > 1e-6:
input_embed: torch.nn.Embedding = model.get_input_embeddings()
@ -62,6 +67,7 @@ def prepare_model_for_training(
return embeddings
input_embed.forward = MethodType(noisy_forward, input_embed)
logger.info("Using noisy embedding with alpha={:.2f}".format(finetuning_args.neft_alpha))
if use_gradient_checkpointing:
if hasattr(model, "enable_input_require_grads"):
@ -73,6 +79,7 @@ def prepare_model_for_training(
model.gradient_checkpointing_enable()
model.config.use_cache = False # turn off when gradient checkpointing is enabled
logger.info("Gradient checkpointing enabled.")
if finetuning_args.finetuning_type != "full" and hasattr(model, output_layer_name):
output_layer: torch.nn.Linear = getattr(model, output_layer_name)

View File

@ -31,9 +31,10 @@ def create_top() -> Dict[str, "Component"]:
with gr.Accordion(label="Model config (LLaMA only)", open=False) as llama_tab:
with gr.Row():
flash_attn = gr.Checkbox(value=False)
shift_attn = gr.Checkbox(value=False)
rope_scaling = gr.Dropdown(choices=["none", "linear", "dynamic"], value="none")
with gr.Column():
flash_attn = gr.Checkbox(value=False)
shift_attn = gr.Checkbox(value=False)
rope_scaling = gr.Radio(choices=["none", "linear", "dynamic"], value="none")
model_name.change(
list_checkpoint, [model_name, finetuning_type], [checkpoints], queue=False

View File

@ -79,26 +79,30 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
logging_steps = gr.Slider(value=5, minimum=5, maximum=1000, step=5)
save_steps = gr.Slider(value=100, minimum=10, maximum=5000, step=10)
warmup_steps = gr.Slider(value=0, minimum=0, maximum=5000, step=1)
neft_alpha = gr.Slider(value=0, minimum=0, maximum=10, step=0.1)
input_elems.update({logging_steps, save_steps, warmup_steps})
with gr.Column():
train_on_prompt = gr.Checkbox(value=False)
upcast_layernorm = gr.Checkbox(value=False)
input_elems.update({logging_steps, save_steps, warmup_steps, neft_alpha, train_on_prompt, upcast_layernorm})
elem_dict.update(dict(
advanced_tab=advanced_tab, logging_steps=logging_steps, save_steps=save_steps, warmup_steps=warmup_steps
advanced_tab=advanced_tab, logging_steps=logging_steps, save_steps=save_steps, warmup_steps=warmup_steps,
neft_alpha=neft_alpha, train_on_prompt=train_on_prompt, upcast_layernorm=upcast_layernorm
))
with gr.Accordion(label="LoRA config", open=False) as lora_tab:
with gr.Row():
lora_rank = gr.Slider(value=8, minimum=1, maximum=1024, step=1, scale=1)
lora_dropout = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, scale=1)
lora_target = gr.Textbox(scale=2)
lora_target = gr.Textbox(scale=1)
additional_target = gr.Textbox(scale=1)
resume_lora_training = gr.Checkbox(value=True, scale=1)
input_elems.update({lora_rank, lora_dropout, lora_target, resume_lora_training})
input_elems.update({lora_rank, lora_dropout, lora_target, additional_target, resume_lora_training})
elem_dict.update(dict(
lora_tab=lora_tab,
lora_rank=lora_rank,
lora_dropout=lora_dropout,
lora_target=lora_target,
resume_lora_training=resume_lora_training,
lora_tab=lora_tab, lora_rank=lora_rank, lora_dropout=lora_dropout, lora_target=lora_target,
additional_target=additional_target, resume_lora_training=resume_lora_training,
))
with gr.Accordion(label="RLHF config", open=False) as rlhf_tab:

View File

@ -309,6 +309,36 @@ LOCALES = {
"info": "学习率预热采用的步数。"
}
},
"neft_alpha": {
"en": {
"label": "NEFTune Alpha",
"info": "Magnitude of noise adding to embedding vectors."
},
"zh": {
"label": "NEFTune 噪声参数",
"info": "嵌入向量所添加的噪声大小。"
}
},
"train_on_prompt": {
"en": {
"label": "Train on prompt",
"info": "Compute loss on the prompt tokens in supervised fine-tuning."
},
"zh": {
"label": "计算输入损失",
"info": "在监督微调时候计算输入序列的损失。"
}
},
"upcast_layernorm": {
"en": {
"label": "Upcast LayerNorm",
"info": "Upcast weights of layernorm in float32."
},
"zh": {
"label": "缩放归一化层",
"info": "将归一化层权重缩放至 32 位浮点数。"
}
},
"lora_tab": {
"en": {
"label": "LoRA configurations"
@ -340,11 +370,21 @@ LOCALES = {
"lora_target": {
"en": {
"label": "LoRA modules (optional)",
"info": "The name(s) of target modules to apply LoRA. Use commas to separate multiple modules."
"info": "Name(s) of target modules to apply LoRA. Use commas to separate multiple modules."
},
"zh": {
"label": "LoRA 作用层(非必填)",
"info": "应用 LoRA 的线性层名称。使用英文逗号分隔多个名称。"
"label": "LoRA 作用模块(非必填)",
"info": "应用 LoRA 的目标模块名称。使用英文逗号分隔多个名称。"
}
},
"additional_target": {
"en": {
"label": "Additional modules (optional)",
"info": "Name(s) of modules apart from LoRA layers to be set as trainable. Use commas to separate multiple modules."
},
"zh": {
"label": "附加模块(非必填)",
"info": "除 LoRA 层以外的可训练模块名称。使用英文逗号分隔多个名称。"
}
},
"resume_lora_training": {

View File

@ -89,7 +89,6 @@ class Runner:
stage=TRAINING_STAGES[get("train.training_stage")],
model_name_or_path=get("top.model_path"),
do_train=True,
overwrite_cache=False,
cache_dir=user_config.get("cache_dir", None),
checkpoint_dir=checkpoint_dir,
finetuning_type=get("top.finetuning_type"),
@ -112,9 +111,13 @@ class Runner:
logging_steps=get("train.logging_steps"),
save_steps=get("train.save_steps"),
warmup_steps=get("train.warmup_steps"),
neft_alpha=get("train.neft_alpha"),
train_on_prompt=get("train.train_on_prompt"),
upcast_layernorm=get("train.upcast_layernorm"),
lora_rank=get("train.lora_rank"),
lora_dropout=get("train.lora_dropout"),
lora_target=get("train.lora_target") or get_module(get("top.model_name")),
additional_target=get("train.additional_target"),
resume_lora_training=get("train.resume_lora_training"),
output_dir=output_dir
)
@ -160,7 +163,6 @@ class Runner:
stage="sft",
model_name_or_path=get("top.model_path"),
do_eval=True,
overwrite_cache=False,
predict_with_generate=True,
cache_dir=user_config.get("cache_dir", None),
checkpoint_dir=checkpoint_dir,