From 4cff6a4ad55b24bf57db6be5cf817180c1ea5626 Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 19 Jun 2024 17:44:05 +0800 Subject: [PATCH] fix templates --- src/llamafactory/data/formatter.py | 2 +- src/llamafactory/data/template.py | 48 ++++++++----------------- src/llamafactory/train/trainer_utils.py | 3 +- 3 files changed, 18 insertions(+), 35 deletions(-) diff --git a/src/llamafactory/data/formatter.py b/src/llamafactory/data/formatter.py index 70be6a5a..ed9ba8b8 100644 --- a/src/llamafactory/data/formatter.py +++ b/src/llamafactory/data/formatter.py @@ -68,7 +68,7 @@ def default_tool_formatter(tools: List[Dict[str, Any]]) -> str: def default_tool_extractor(content: str) -> Union[str, List[Tuple[str, str]]]: - regex = re.compile(r"Action:\s*([a-zA-Z0-9_]+)\s*Action Input:\s*(.+?)(?=\s*Action:|$)", re.DOTALL) + regex = re.compile(r"Action:\s*([a-zA-Z0-9_]+)\s*Action Input:\s*(.+?)(?=\s*Action:|\s*$)", re.DOTALL) action_match: List[Tuple[str, str]] = re.findall(regex, content) if not action_match: return content diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index c9af9605..3c4bc5ec 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -478,11 +478,7 @@ _register_template( _register_template( name="breeze", format_user=StringFormatter(slots=["[INST] {{content}} [/INST] "]), - format_system=StringFormatter(slots=[{"bos_token"}, "{{content}}"]), - default_system=( - "You are a helpful AI assistant built by MediaTek Research. " - "The user you are helping speaks Traditional Chinese and comes from Taiwan." - ), + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), efficient_eos=True, ) @@ -569,13 +565,8 @@ _register_template( ) ] ), - format_system=StringFormatter( - slots=[{"bos_token"}, "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}<|END_OF_TURN_TOKEN|>"] - ), - default_system=( - "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users " - "by providing thorough responses. You are trained by Cohere." - ), + format_system=StringFormatter(slots=["<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{content}}<|END_OF_TURN_TOKEN|>"]), + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), ) @@ -645,8 +636,6 @@ _register_template( _register_template( name="empty", - format_user=StringFormatter(slots=["{{content}}"]), - format_assistant=StringFormatter(slots=["{{content}}"]), format_prefix=EmptyFormatter(slots=[{"bos_token"}]), efficient_eos=True, ) @@ -695,25 +684,21 @@ _register_template( _register_template( name="intern", - format_user=StringFormatter(slots=["<|User|>:{{content}}", {"token": ""}, "\n<|Bot|>:"]), - format_separator=EmptyFormatter(slots=[{"token": ""}, "\n"]), + format_user=StringFormatter(slots=["<|User|>:{{content}}\n<|Bot|>:"]), + format_system=StringFormatter(slots=["<|System|>:{{content}}\n"]), + format_separator=EmptyFormatter(slots=["\n"]), + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), stop_words=[""], - efficient_eos=True, + efficient_eos=True, # internlm tokenizer cannot set eos_token_id ) _register_template( name="intern2", format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]), - format_system=StringFormatter(slots=[{"bos_token"}, "<|im_start|>system\n{{content}}<|im_end|>\n"]), - format_separator=EmptyFormatter(slots=["\n"]), - default_system=( - "You are an AI assistant whose name is InternLM (书生·浦语).\n" - "- InternLM (书生·浦语) is a conversational language model that is developed " - "by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.\n" - "- InternLM (书生·浦语) can understand and communicate fluently in the language chosen " - "by the user such as English and 中文." - ), + format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]), + format_separator=EmptyFormatter(slots=["<|im_end|>\n"]), + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), stop_words=["<|im_end|>"], efficient_eos=True, # internlm2 tokenizer cannot set eos_token_id ) @@ -722,7 +707,6 @@ _register_template( _register_template( name="llama2", format_user=StringFormatter(slots=[{"bos_token"}, "[INST] {{content}} [/INST]"]), - format_assistant=StringFormatter(slots=[" {{content}} ", {"eos_token"}]), format_system=StringFormatter(slots=["<>\n{{content}}\n<>\n\n"]), ) @@ -745,9 +729,7 @@ _register_template( ) ] ), - format_system=StringFormatter( - slots=[{"bos_token"}, "<|start_header_id|>system<|end_header_id|>\n\n{{content}}<|eot_id|>"] - ), + format_system=StringFormatter(slots=["<|start_header_id|>system<|end_header_id|>\n\n{{content}}<|eot_id|>"]), format_observation=StringFormatter( slots=[ ( @@ -756,7 +738,7 @@ _register_template( ) ] ), - default_system="You are a helpful assistant.", + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), stop_words=["<|eot_id|>"], replace_eos=True, ) @@ -809,9 +791,9 @@ _register_template( _register_template( name="phi", format_user=StringFormatter(slots=["<|user|>\n{{content}}<|end|>\n<|assistant|>\n"]), - format_system=StringFormatter(slots=[{"bos_token"}, "<|system|>\n{{content}}<|end|>\n"]), + format_system=StringFormatter(slots=["<|system|>\n{{content}}<|end|>\n"]), format_separator=EmptyFormatter(slots=["\n"]), - default_system="You are a helpful AI assistant.", + format_prefix=EmptyFormatter(slots=[{"bos_token"}]), stop_words=["<|end|>"], replace_eos=True, ) diff --git a/src/llamafactory/train/trainer_utils.py b/src/llamafactory/train/trainer_utils.py index 9052c96d..c1b90155 100644 --- a/src/llamafactory/train/trainer_utils.py +++ b/src/llamafactory/train/trainer_utils.py @@ -198,6 +198,7 @@ def convert_pissa_adapter( safe_serialization=training_args.save_safetensors, ) setattr(unwrapped_model.peft_config["default"], "init_lora_weights", init_lora_weights) + elif output_dir == training_args.output_dir: # at the end of training logger.info("Converted PiSSA adapter will be saved at: {}.".format(output_dir)) unwrapped_model = accelerator.unwrap_model(model) @@ -233,7 +234,7 @@ def _create_galore_optimizer( finetuning_args: "FinetuningArguments", ) -> "torch.optim.Optimizer": if len(finetuning_args.galore_target) == 1 and finetuning_args.galore_target[0] == "all": - galore_targets = find_all_linear_modules(model) + galore_targets = find_all_linear_modules(model, finetuning_args.freeze_vision_tower) else: galore_targets = finetuning_args.galore_target