update UI, fix #212
This commit is contained in:
parent
c3fcb67486
commit
4d1641c1bf
|
@ -20,7 +20,7 @@ class ChatModel:
|
||||||
) -> None:
|
) -> None:
|
||||||
self.model, self.tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
|
self.model, self.tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
|
||||||
self.template = get_template(data_args.prompt_template)
|
self.template = get_template(data_args.prompt_template)
|
||||||
self.source_prefix = data_args.source_prefix if data_args.source_prefix else ""
|
self.source_prefix = data_args.source_prefix or ""
|
||||||
self.generating_args = generating_args
|
self.generating_args = generating_args
|
||||||
|
|
||||||
def process_args(
|
def process_args(
|
||||||
|
|
|
@ -36,7 +36,7 @@ SUPPORTED_MODELS = {
|
||||||
"InternLM-7B-Chat": "internlm/internlm-chat-7b"
|
"InternLM-7B-Chat": "internlm/internlm-chat-7b"
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFAULT_MODULE = { # will be deprecated
|
DEFAULT_MODULE = {
|
||||||
"LLaMA": "q_proj,v_proj",
|
"LLaMA": "q_proj,v_proj",
|
||||||
"LLaMA2": "q_proj,v_proj",
|
"LLaMA2": "q_proj,v_proj",
|
||||||
"BLOOM": "query_key_value",
|
"BLOOM": "query_key_value",
|
||||||
|
|
|
@ -215,7 +215,7 @@ Supports: https://huggingface.co/baichuan-inc/Baichuan-13B-Chat
|
||||||
register_template(
|
register_template(
|
||||||
name="baichuan",
|
name="baichuan",
|
||||||
prefix="",
|
prefix="",
|
||||||
prompt=" <reserved_102> {query} <reserved_103> ",
|
prompt="<reserved_102>{query}<reserved_103>",
|
||||||
sep="</s>",
|
sep="</s>",
|
||||||
use_history=True
|
use_history=True
|
||||||
)
|
)
|
||||||
|
|
|
@ -73,6 +73,7 @@ class WebChatModel(ChatModel):
|
||||||
chatbot: List[Tuple[str, str]],
|
chatbot: List[Tuple[str, str]],
|
||||||
query: str,
|
query: str,
|
||||||
history: List[Tuple[str, str]],
|
history: List[Tuple[str, str]],
|
||||||
|
prefix: str,
|
||||||
max_new_tokens: int,
|
max_new_tokens: int,
|
||||||
top_p: float,
|
top_p: float,
|
||||||
temperature: float
|
temperature: float
|
||||||
|
@ -80,7 +81,7 @@ class WebChatModel(ChatModel):
|
||||||
chatbot.append([query, ""])
|
chatbot.append([query, ""])
|
||||||
response = ""
|
response = ""
|
||||||
for new_text in self.stream_chat(
|
for new_text in self.stream_chat(
|
||||||
query, history, max_new_tokens=max_new_tokens, top_p=top_p, temperature=temperature
|
query, history, prefix, max_new_tokens=max_new_tokens, top_p=top_p, temperature=temperature
|
||||||
):
|
):
|
||||||
response += new_text
|
response += new_text
|
||||||
new_history = history + [(query, response)]
|
new_history = history + [(query, response)]
|
||||||
|
|
|
@ -16,11 +16,9 @@ def create_chat_box(
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column(scale=4):
|
with gr.Column(scale=4):
|
||||||
with gr.Column(scale=12):
|
prefix = gr.Textbox(show_label=False)
|
||||||
query = gr.Textbox(show_label=False, lines=8)
|
query = gr.Textbox(show_label=False, lines=8)
|
||||||
|
submit_btn = gr.Button(variant="primary")
|
||||||
with gr.Column(min_width=32, scale=1):
|
|
||||||
submit_btn = gr.Button(variant="primary")
|
|
||||||
|
|
||||||
with gr.Column(scale=1):
|
with gr.Column(scale=1):
|
||||||
clear_btn = gr.Button()
|
clear_btn = gr.Button()
|
||||||
|
@ -36,7 +34,7 @@ def create_chat_box(
|
||||||
|
|
||||||
submit_btn.click(
|
submit_btn.click(
|
||||||
chat_model.predict,
|
chat_model.predict,
|
||||||
[chatbot, query, history, max_new_tokens, top_p, temperature],
|
[chatbot, query, history, prefix, max_new_tokens, top_p, temperature],
|
||||||
[chatbot, history],
|
[chatbot, history],
|
||||||
show_progress=True
|
show_progress=True
|
||||||
).then(
|
).then(
|
||||||
|
@ -46,6 +44,7 @@ def create_chat_box(
|
||||||
clear_btn.click(lambda: ([], []), outputs=[chatbot, history], show_progress=True)
|
clear_btn.click(lambda: ([], []), outputs=[chatbot, history], show_progress=True)
|
||||||
|
|
||||||
return chat_box, chatbot, history, dict(
|
return chat_box, chatbot, history, dict(
|
||||||
|
prefix=prefix,
|
||||||
query=query,
|
query=query,
|
||||||
submit_btn=submit_btn,
|
submit_btn=submit_btn,
|
||||||
clear_btn=clear_btn,
|
clear_btn=clear_btn,
|
||||||
|
|
|
@ -31,7 +31,8 @@ def create_eval_tab(top_elems: Dict[str, Component], runner: Runner) -> Dict[str
|
||||||
start_btn = gr.Button()
|
start_btn = gr.Button()
|
||||||
stop_btn = gr.Button()
|
stop_btn = gr.Button()
|
||||||
|
|
||||||
output_box = gr.Markdown()
|
with gr.Box():
|
||||||
|
output_box = gr.Markdown()
|
||||||
|
|
||||||
start_btn.click(
|
start_btn.click(
|
||||||
runner.run_eval,
|
runner.run_eval,
|
||||||
|
|
|
@ -12,7 +12,7 @@ def create_infer_tab(top_elems: Dict[str, Component]) -> Dict[str, Component]:
|
||||||
load_btn = gr.Button()
|
load_btn = gr.Button()
|
||||||
unload_btn = gr.Button()
|
unload_btn = gr.Button()
|
||||||
|
|
||||||
info_box = gr.Markdown()
|
info_box = gr.Textbox(show_label=False, interactive=False)
|
||||||
|
|
||||||
chat_model = WebChatModel()
|
chat_model = WebChatModel()
|
||||||
chat_box, chatbot, history, chat_elems = create_chat_box(chat_model)
|
chat_box, chatbot, history, chat_elems = create_chat_box(chat_model)
|
||||||
|
|
|
@ -35,12 +35,21 @@ def create_sft_tab(top_elems: Dict[str, Component], runner: Runner) -> Dict[str,
|
||||||
lr_scheduler_type = gr.Dropdown(
|
lr_scheduler_type = gr.Dropdown(
|
||||||
value="cosine", choices=[scheduler.value for scheduler in SchedulerType]
|
value="cosine", choices=[scheduler.value for scheduler in SchedulerType]
|
||||||
)
|
)
|
||||||
|
max_grad_norm = gr.Textbox(value="1.0")
|
||||||
dev_ratio = gr.Slider(value=0, minimum=0, maximum=1, step=0.001)
|
dev_ratio = gr.Slider(value=0, minimum=0, maximum=1, step=0.001)
|
||||||
fp16 = gr.Checkbox(value=True)
|
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Accordion(label="Advanced config", open=False) as advanced_tab:
|
||||||
logging_steps = gr.Slider(value=5, minimum=5, maximum=1000, step=5)
|
with gr.Row():
|
||||||
save_steps = gr.Slider(value=100, minimum=10, maximum=5000, step=10)
|
logging_steps = gr.Slider(value=5, minimum=5, maximum=1000, step=5)
|
||||||
|
save_steps = gr.Slider(value=100, minimum=10, maximum=5000, step=10)
|
||||||
|
warmup_steps = gr.Slider(value=0, minimum=0, maximum=5000, step=1)
|
||||||
|
compute_type = gr.Radio(choices=["fp16", "bf16"], value="fp16")
|
||||||
|
|
||||||
|
with gr.Accordion(label="LoRA config", open=False) as lora_tab:
|
||||||
|
with gr.Row():
|
||||||
|
lora_rank = gr.Slider(value=8, minimum=1, maximum=1024, step=1, scale=1)
|
||||||
|
lora_dropout = gr.Slider(value=0, minimum=0, maximum=1, step=0.01, scale=1)
|
||||||
|
lora_target = gr.Textbox(scale=2)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
start_btn = gr.Button()
|
start_btn = gr.Button()
|
||||||
|
@ -49,7 +58,9 @@ def create_sft_tab(top_elems: Dict[str, Component], runner: Runner) -> Dict[str,
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column(scale=4):
|
with gr.Column(scale=4):
|
||||||
output_dir = gr.Textbox(interactive=True)
|
output_dir = gr.Textbox(interactive=True)
|
||||||
output_box = gr.Markdown()
|
|
||||||
|
with gr.Box():
|
||||||
|
output_box = gr.Markdown()
|
||||||
|
|
||||||
with gr.Column(scale=1):
|
with gr.Column(scale=1):
|
||||||
loss_viewer = gr.Plot()
|
loss_viewer = gr.Plot()
|
||||||
|
@ -74,10 +85,15 @@ def create_sft_tab(top_elems: Dict[str, Component], runner: Runner) -> Dict[str,
|
||||||
batch_size,
|
batch_size,
|
||||||
gradient_accumulation_steps,
|
gradient_accumulation_steps,
|
||||||
lr_scheduler_type,
|
lr_scheduler_type,
|
||||||
|
max_grad_norm,
|
||||||
dev_ratio,
|
dev_ratio,
|
||||||
fp16,
|
|
||||||
logging_steps,
|
logging_steps,
|
||||||
save_steps,
|
save_steps,
|
||||||
|
warmup_steps,
|
||||||
|
compute_type,
|
||||||
|
lora_rank,
|
||||||
|
lora_dropout,
|
||||||
|
lora_target,
|
||||||
output_dir
|
output_dir
|
||||||
],
|
],
|
||||||
[output_box]
|
[output_box]
|
||||||
|
@ -103,10 +119,17 @@ def create_sft_tab(top_elems: Dict[str, Component], runner: Runner) -> Dict[str,
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
gradient_accumulation_steps=gradient_accumulation_steps,
|
gradient_accumulation_steps=gradient_accumulation_steps,
|
||||||
lr_scheduler_type=lr_scheduler_type,
|
lr_scheduler_type=lr_scheduler_type,
|
||||||
|
max_grad_norm=max_grad_norm,
|
||||||
dev_ratio=dev_ratio,
|
dev_ratio=dev_ratio,
|
||||||
fp16=fp16,
|
advanced_tab=advanced_tab,
|
||||||
logging_steps=logging_steps,
|
logging_steps=logging_steps,
|
||||||
save_steps=save_steps,
|
save_steps=save_steps,
|
||||||
|
warmup_steps=warmup_steps,
|
||||||
|
compute_type=compute_type,
|
||||||
|
lora_tab=lora_tab,
|
||||||
|
lora_rank=lora_rank,
|
||||||
|
lora_dropout=lora_dropout,
|
||||||
|
lora_target=lora_target,
|
||||||
start_btn=start_btn,
|
start_btn=start_btn,
|
||||||
stop_btn=stop_btn,
|
stop_btn=stop_btn,
|
||||||
output_dir=output_dir,
|
output_dir=output_dir,
|
||||||
|
|
|
@ -22,10 +22,11 @@ def create_top() -> Dict[str, Component]:
|
||||||
checkpoints = gr.Dropdown(multiselect=True, scale=5)
|
checkpoints = gr.Dropdown(multiselect=True, scale=5)
|
||||||
refresh_btn = gr.Button(scale=1)
|
refresh_btn = gr.Button(scale=1)
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Accordion(label="Advanced config", open=False) as advanced_tab:
|
||||||
quantization_bit = gr.Dropdown([8, 4], scale=1)
|
with gr.Row():
|
||||||
template = gr.Dropdown(value="default", choices=list(templates.keys()), scale=2)
|
quantization_bit = gr.Dropdown([8, 4], scale=1)
|
||||||
source_prefix = gr.Textbox(scale=4)
|
template = gr.Dropdown(value="default", choices=list(templates.keys()), scale=1)
|
||||||
|
source_prefix = gr.Textbox(scale=2)
|
||||||
|
|
||||||
model_name.change(
|
model_name.change(
|
||||||
list_checkpoint, [model_name, finetuning_type], [checkpoints]
|
list_checkpoint, [model_name, finetuning_type], [checkpoints]
|
||||||
|
@ -47,9 +48,10 @@ def create_top() -> Dict[str, Component]:
|
||||||
model_name=model_name,
|
model_name=model_name,
|
||||||
model_path=model_path,
|
model_path=model_path,
|
||||||
finetuning_type=finetuning_type,
|
finetuning_type=finetuning_type,
|
||||||
template=template,
|
|
||||||
checkpoints=checkpoints,
|
checkpoints=checkpoints,
|
||||||
refresh_btn=refresh_btn,
|
refresh_btn=refresh_btn,
|
||||||
|
advanced_tab=advanced_tab,
|
||||||
quantization_bit=quantization_bit,
|
quantization_bit=quantization_bit,
|
||||||
|
template=template,
|
||||||
source_prefix=source_prefix
|
source_prefix=source_prefix
|
||||||
)
|
)
|
||||||
|
|
|
@ -27,7 +27,7 @@ def create_ui() -> gr.Blocks:
|
||||||
with gr.Tab("Evaluate"):
|
with gr.Tab("Evaluate"):
|
||||||
eval_elems = create_eval_tab(top_elems, runner)
|
eval_elems = create_eval_tab(top_elems, runner)
|
||||||
|
|
||||||
with gr.Tab("Inference"):
|
with gr.Tab("Chat"):
|
||||||
infer_elems = create_infer_tab(top_elems)
|
infer_elems = create_infer_tab(top_elems)
|
||||||
|
|
||||||
elem_list = [top_elems, sft_elems, eval_elems, infer_elems]
|
elem_list = [top_elems, sft_elems, eval_elems, infer_elems]
|
||||||
|
|
|
@ -49,6 +49,14 @@ LOCALES = {
|
||||||
"value": "刷新断点"
|
"value": "刷新断点"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"advanced_tab": {
|
||||||
|
"en": {
|
||||||
|
"label": "Advanced configurations"
|
||||||
|
},
|
||||||
|
"zh": {
|
||||||
|
"label": "高级设置"
|
||||||
|
}
|
||||||
|
},
|
||||||
"quantization_bit": {
|
"quantization_bit": {
|
||||||
"en": {
|
"en": {
|
||||||
"label": "Quantization bit (optional)",
|
"label": "Quantization bit (optional)",
|
||||||
|
@ -71,12 +79,12 @@ LOCALES = {
|
||||||
},
|
},
|
||||||
"source_prefix": {
|
"source_prefix": {
|
||||||
"en": {
|
"en": {
|
||||||
"label": "Source prefix (optional)",
|
"label": "System prompt (optional)",
|
||||||
"info": "A sequence used as the prefix of each samples."
|
"info": "A sequence used as the default system prompt."
|
||||||
},
|
},
|
||||||
"zh": {
|
"zh": {
|
||||||
"label": "前缀序列(非必填)",
|
"label": "系统提示词(非必填)",
|
||||||
"info": "作为每个输入样本前缀的序列"
|
"info": "默认使用的系统提示词"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"dataset_dir": {
|
"dataset_dir": {
|
||||||
|
@ -209,6 +217,16 @@ LOCALES = {
|
||||||
"info": "采用的学习率调节器名称。"
|
"info": "采用的学习率调节器名称。"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"max_grad_norm": {
|
||||||
|
"en": {
|
||||||
|
"label": "Maximum gradient norm",
|
||||||
|
"info": "Norm for gradient clipping.."
|
||||||
|
},
|
||||||
|
"zh": {
|
||||||
|
"label": "最大梯度范数",
|
||||||
|
"info": "用于梯度裁剪的范数。"
|
||||||
|
}
|
||||||
|
},
|
||||||
"dev_ratio": {
|
"dev_ratio": {
|
||||||
"en": {
|
"en": {
|
||||||
"label": "Dev ratio",
|
"label": "Dev ratio",
|
||||||
|
@ -219,20 +237,10 @@ LOCALES = {
|
||||||
"info": "验证集占全部样本的百分比。"
|
"info": "验证集占全部样本的百分比。"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"fp16": {
|
|
||||||
"en": {
|
|
||||||
"label": "fp16",
|
|
||||||
"info": "Whether to use fp16 mixed precision training."
|
|
||||||
},
|
|
||||||
"zh": {
|
|
||||||
"label": "fp16",
|
|
||||||
"info": "是否启用 FP16 混合精度训练。"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"logging_steps": {
|
"logging_steps": {
|
||||||
"en": {
|
"en": {
|
||||||
"label": "Logging steps",
|
"label": "Logging steps",
|
||||||
"info": "Number of update steps between two logs."
|
"info": "Number of steps between two logs."
|
||||||
},
|
},
|
||||||
"zh": {
|
"zh": {
|
||||||
"label": "日志间隔",
|
"label": "日志间隔",
|
||||||
|
@ -242,13 +250,71 @@ LOCALES = {
|
||||||
"save_steps": {
|
"save_steps": {
|
||||||
"en": {
|
"en": {
|
||||||
"label": "Save steps",
|
"label": "Save steps",
|
||||||
"info": "Number of updates steps between two checkpoints."
|
"info": "Number of steps between two checkpoints."
|
||||||
},
|
},
|
||||||
"zh": {
|
"zh": {
|
||||||
"label": "保存间隔",
|
"label": "保存间隔",
|
||||||
"info": "每两次断点保存间的更新步数。"
|
"info": "每两次断点保存间的更新步数。"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"warmup_steps": {
|
||||||
|
"en": {
|
||||||
|
"label": "Warmup steps",
|
||||||
|
"info": "Number of steps used for warmup."
|
||||||
|
},
|
||||||
|
"zh": {
|
||||||
|
"label": "预热步数",
|
||||||
|
"info": "学习率预热采用的步数。"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"compute_type": {
|
||||||
|
"en": {
|
||||||
|
"label": "Compute type",
|
||||||
|
"info": "Whether to use fp16 or bf16 mixed precision training."
|
||||||
|
},
|
||||||
|
"zh": {
|
||||||
|
"label": "计算类型",
|
||||||
|
"info": "是否启用 FP16 或 BF16 混合精度训练。"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lora_tab": {
|
||||||
|
"en": {
|
||||||
|
"label": "LoRA configurations"
|
||||||
|
},
|
||||||
|
"zh": {
|
||||||
|
"label": "LoRA 参数设置"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lora_rank": {
|
||||||
|
"en": {
|
||||||
|
"label": "LoRA rank",
|
||||||
|
"info": "The rank of LoRA matrices."
|
||||||
|
},
|
||||||
|
"zh": {
|
||||||
|
"label": "LoRA 秩",
|
||||||
|
"info": "LoRA 矩阵的秩。"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lora_dropout": {
|
||||||
|
"en": {
|
||||||
|
"label": "LoRA Dropout",
|
||||||
|
"info": "Dropout ratio of LoRA weights."
|
||||||
|
},
|
||||||
|
"zh": {
|
||||||
|
"label": "LoRA 随机丢弃",
|
||||||
|
"info": "LoRA 权重随机丢弃的概率。"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"lora_target": {
|
||||||
|
"en": {
|
||||||
|
"label": "LoRA modules (optional)",
|
||||||
|
"info": "The name(s) of target modules to apply LoRA. Use commas to separate multiple modules."
|
||||||
|
},
|
||||||
|
"zh": {
|
||||||
|
"label": "LoRA 作用层(非必填)",
|
||||||
|
"info": "应用 LoRA 的线性层名称。使用英文逗号分隔多个名称。"
|
||||||
|
}
|
||||||
|
},
|
||||||
"start_btn": {
|
"start_btn": {
|
||||||
"en": {
|
"en": {
|
||||||
"value": "Start"
|
"value": "Start"
|
||||||
|
@ -323,6 +389,14 @@ LOCALES = {
|
||||||
"value": "模型未加载,请先加载模型。"
|
"value": "模型未加载,请先加载模型。"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"prefix": {
|
||||||
|
"en": {
|
||||||
|
"placeholder": "System prompt (optional)"
|
||||||
|
},
|
||||||
|
"zh": {
|
||||||
|
"placeholder": "系统提示词(非必填)"
|
||||||
|
}
|
||||||
|
},
|
||||||
"query": {
|
"query": {
|
||||||
"en": {
|
"en": {
|
||||||
"placeholder": "Input..."
|
"placeholder": "Input..."
|
||||||
|
|
|
@ -6,7 +6,7 @@ import transformers
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
from llmtuner.extras.callbacks import LogCallback
|
from llmtuner.extras.callbacks import LogCallback
|
||||||
from llmtuner.extras.constants import DEFAULT_MODULE # will be deprecated
|
from llmtuner.extras.constants import DEFAULT_MODULE
|
||||||
from llmtuner.extras.logging import LoggerHandler
|
from llmtuner.extras.logging import LoggerHandler
|
||||||
from llmtuner.extras.misc import torch_gc
|
from llmtuner.extras.misc import torch_gc
|
||||||
from llmtuner.tuner import get_train_args, run_sft
|
from llmtuner.tuner import get_train_args, run_sft
|
||||||
|
@ -77,10 +77,15 @@ class Runner:
|
||||||
batch_size: int,
|
batch_size: int,
|
||||||
gradient_accumulation_steps: int,
|
gradient_accumulation_steps: int,
|
||||||
lr_scheduler_type: str,
|
lr_scheduler_type: str,
|
||||||
|
max_grad_norm: str,
|
||||||
dev_ratio: float,
|
dev_ratio: float,
|
||||||
fp16: bool,
|
|
||||||
logging_steps: int,
|
logging_steps: int,
|
||||||
save_steps: int,
|
save_steps: int,
|
||||||
|
warmup_steps: int,
|
||||||
|
compute_type: str,
|
||||||
|
lora_rank: int,
|
||||||
|
lora_dropout: float,
|
||||||
|
lora_target: str,
|
||||||
output_dir: str
|
output_dir: str
|
||||||
):
|
):
|
||||||
model_name_or_path, error, logger_handler, trainer_callback = self.initialize(lang, model_name, dataset)
|
model_name_or_path, error, logger_handler, trainer_callback = self.initialize(lang, model_name, dataset)
|
||||||
|
@ -99,7 +104,6 @@ class Runner:
|
||||||
model_name_or_path=model_name_or_path,
|
model_name_or_path=model_name_or_path,
|
||||||
do_train=True,
|
do_train=True,
|
||||||
overwrite_cache=True,
|
overwrite_cache=True,
|
||||||
lora_target=DEFAULT_MODULE.get(model_name.split("-")[0], None) or "q_proj,v_proj",
|
|
||||||
checkpoint_dir=checkpoint_dir,
|
checkpoint_dir=checkpoint_dir,
|
||||||
finetuning_type=finetuning_type,
|
finetuning_type=finetuning_type,
|
||||||
quantization_bit=int(quantization_bit) if quantization_bit else None,
|
quantization_bit=int(quantization_bit) if quantization_bit else None,
|
||||||
|
@ -115,9 +119,15 @@ class Runner:
|
||||||
per_device_train_batch_size=batch_size,
|
per_device_train_batch_size=batch_size,
|
||||||
gradient_accumulation_steps=gradient_accumulation_steps,
|
gradient_accumulation_steps=gradient_accumulation_steps,
|
||||||
lr_scheduler_type=lr_scheduler_type,
|
lr_scheduler_type=lr_scheduler_type,
|
||||||
fp16=fp16,
|
max_grad_norm=float(max_grad_norm),
|
||||||
logging_steps=logging_steps,
|
logging_steps=logging_steps,
|
||||||
save_steps=save_steps,
|
save_steps=save_steps,
|
||||||
|
warmup_steps=warmup_steps,
|
||||||
|
fp16=(compute_type == "fp16"),
|
||||||
|
bf16=(compute_type == "bf16"),
|
||||||
|
lora_rank=lora_rank,
|
||||||
|
lora_dropout=lora_dropout,
|
||||||
|
lora_target=lora_target or DEFAULT_MODULE.get(model_name.split("-")[0], "q_proj,v_proj"),
|
||||||
output_dir=os.path.join(get_save_dir(model_name), finetuning_type, output_dir)
|
output_dir=os.path.join(get_save_dir(model_name), finetuning_type, output_dir)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue