diff --git a/src/llmtuner/__init__.py b/src/llmtuner/__init__.py index fde80959..895a2c48 100644 --- a/src/llmtuner/__init__.py +++ b/src/llmtuner/__init__.py @@ -7,4 +7,4 @@ from llmtuner.tuner import export_model, run_exp from llmtuner.webui import create_ui, create_web_demo -__version__ = "0.2.1" +__version__ = "0.2.2" diff --git a/src/llmtuner/extras/constants.py b/src/llmtuner/extras/constants.py index 6627e95d..95916b69 100644 --- a/src/llmtuner/extras/constants.py +++ b/src/llmtuner/extras/constants.py @@ -150,6 +150,14 @@ register_model_group( ) +register_model_group( + models={ + "LingoWhale-8B": "deeplang-ai/LingoWhale-8B" + }, + module="qkv_proj" +) + + register_model_group( models={ "LLaMA-7B": "huggyllama/llama-7b", diff --git a/src/llmtuner/webui/runner.py b/src/llmtuner/webui/runner.py index ab9e9ffc..18a8c475 100644 --- a/src/llmtuner/webui/runner.py +++ b/src/llmtuner/webui/runner.py @@ -136,7 +136,7 @@ class Runner: args["upcast_layernorm"] = True if args["stage"] == "ppo": - args["reward_model"] = get("train.reward_model") + args["reward_model"] = get_save_dir(get("top.model_name"), get("top.finetuning_type"), get("train.reward_model")) if args["stage"] == "dpo": args["dpo_beta"] = get("train.dpo_beta")