support GPTQ tuning #729 #1481 #1545 , fix chatglm template #1453 #1480 #1569

This commit is contained in:
hiyouga 2023-11-20 22:52:11 +08:00
parent 5021062493
commit 9ea9380145
5 changed files with 43 additions and 4 deletions

View File

@ -324,7 +324,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
```
> [!WARNING]
> Use `--per_device_train_batch_size=1` for LLaMA-2 models in fp16 training.
> Use `--per_device_train_batch_size=1` for LLaMA-2 models in fp16 PPO training.
#### DPO Training

View File

@ -324,7 +324,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
```
> [!WARNING]
> 如果在 fp16 精度下训练 LLaMA-2 模型,请使用 `--per_device_eval_batch_size=1`。
> 如果使用 fp16 精度进行 LLaMA-2 模型的 PPO 训练,请使用 `--per_device_train_batch_size=1`。
#### DPO 训练
@ -499,7 +499,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
```
> [!WARNING]
> 如果在 fp16 精度下推理 LLaMA-2 模型,请使用 `--per_device_eval_batch_size=1`
> 如果使用 fp16 精度进行 LLaMA-2 模型的预测,请使用 `--per_device_eval_batch_size=1`
> [!TIP]
> 我们建议在量化模型的预测中使用 `--per_device_eval_batch_size=1``--max_target_length 128`

View File

@ -354,6 +354,35 @@ register_template(
"\n",
"{{system}}"
],
prompt=[
{"token": "<|user|>"},
"\n",
"{{query}}",
{"token": "<|assistant|>"},
"\n" # add an extra newline to avoid error in ChatGLM's process_response method
],
system=(
"You are ChatGLM3, a large language model trained by Zhipu.AI. "
"Follow the user's instructions carefully. Respond using markdown."
),
sep=[],
stop_words=[
"<|user|>",
"<|observation|>"
],
efficient_eos=True
)
register_template(
name="chatglm3_raw", # the raw template for tool tuning
prefix=[
{"token": "[gMASK]"},
{"token": "sop"},
{"token": "<|system|>"},
"\n",
"{{system}}"
],
prompt=[
{"token": "<|user|>"},
"\n",

View File

@ -65,7 +65,12 @@ def init_adapter(
checkpoint_to_resume = None
if model_args.checkpoint_dir is not None:
if is_trainable and finetuning_args.resume_lora_training:
is_mergeable = True
if getattr(model, "quantization_method", None) == "gptq":
assert len(model_args.checkpoint_dir) == 1, "GPTQ quantized model only accepts a single checkpoint."
is_mergeable = False
if (is_trainable and finetuning_args.resume_lora_training) or (not is_mergeable):
checkpoints_to_merge, checkpoint_to_resume = model_args.checkpoint_dir[:-1], model_args.checkpoint_dir[-1]
else:
checkpoints_to_merge = model_args.checkpoint_dir

View File

@ -37,8 +37,13 @@ def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["Tra
def export_model(args: Optional[Dict[str, Any]] = None, max_shard_size: Optional[str] = "10GB"):
model_args, _, finetuning_args, _ = get_infer_args(args)
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
if getattr(model, "quantization_method", None) == "gptq":
raise ValueError("Cannot export a GPTQ quantized model.")
model.config.use_cache = True
model.save_pretrained(finetuning_args.export_dir, max_shard_size=max_shard_size)
try:
tokenizer.padding_side = "left" # restore padding side
tokenizer.init_kwargs["padding_side"] = "left"