This commit is contained in:
parent
5021062493
commit
9ea9380145
|
@ -324,7 +324,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
||||||
```
|
```
|
||||||
|
|
||||||
> [!WARNING]
|
> [!WARNING]
|
||||||
> Use `--per_device_train_batch_size=1` for LLaMA-2 models in fp16 training.
|
> Use `--per_device_train_batch_size=1` for LLaMA-2 models in fp16 PPO training.
|
||||||
|
|
||||||
#### DPO Training
|
#### DPO Training
|
||||||
|
|
||||||
|
|
|
@ -324,7 +324,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
||||||
```
|
```
|
||||||
|
|
||||||
> [!WARNING]
|
> [!WARNING]
|
||||||
> 如果在 fp16 精度下训练 LLaMA-2 模型,请使用 `--per_device_eval_batch_size=1`。
|
> 如果使用 fp16 精度进行 LLaMA-2 模型的 PPO 训练,请使用 `--per_device_train_batch_size=1`。
|
||||||
|
|
||||||
#### DPO 训练
|
#### DPO 训练
|
||||||
|
|
||||||
|
@ -499,7 +499,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
|
||||||
```
|
```
|
||||||
|
|
||||||
> [!WARNING]
|
> [!WARNING]
|
||||||
> 如果在 fp16 精度下推理 LLaMA-2 模型,请使用 `--per_device_eval_batch_size=1`。
|
> 如果使用 fp16 精度进行 LLaMA-2 模型的预测,请使用 `--per_device_eval_batch_size=1`。
|
||||||
|
|
||||||
> [!TIP]
|
> [!TIP]
|
||||||
> 我们建议在量化模型的预测中使用 `--per_device_eval_batch_size=1` 和 `--max_target_length 128`。
|
> 我们建议在量化模型的预测中使用 `--per_device_eval_batch_size=1` 和 `--max_target_length 128`。
|
||||||
|
|
|
@ -354,6 +354,35 @@ register_template(
|
||||||
"\n",
|
"\n",
|
||||||
"{{system}}"
|
"{{system}}"
|
||||||
],
|
],
|
||||||
|
prompt=[
|
||||||
|
{"token": "<|user|>"},
|
||||||
|
"\n",
|
||||||
|
"{{query}}",
|
||||||
|
{"token": "<|assistant|>"},
|
||||||
|
"\n" # add an extra newline to avoid error in ChatGLM's process_response method
|
||||||
|
],
|
||||||
|
system=(
|
||||||
|
"You are ChatGLM3, a large language model trained by Zhipu.AI. "
|
||||||
|
"Follow the user's instructions carefully. Respond using markdown."
|
||||||
|
),
|
||||||
|
sep=[],
|
||||||
|
stop_words=[
|
||||||
|
"<|user|>",
|
||||||
|
"<|observation|>"
|
||||||
|
],
|
||||||
|
efficient_eos=True
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
register_template(
|
||||||
|
name="chatglm3_raw", # the raw template for tool tuning
|
||||||
|
prefix=[
|
||||||
|
{"token": "[gMASK]"},
|
||||||
|
{"token": "sop"},
|
||||||
|
{"token": "<|system|>"},
|
||||||
|
"\n",
|
||||||
|
"{{system}}"
|
||||||
|
],
|
||||||
prompt=[
|
prompt=[
|
||||||
{"token": "<|user|>"},
|
{"token": "<|user|>"},
|
||||||
"\n",
|
"\n",
|
||||||
|
|
|
@ -65,7 +65,12 @@ def init_adapter(
|
||||||
checkpoint_to_resume = None
|
checkpoint_to_resume = None
|
||||||
|
|
||||||
if model_args.checkpoint_dir is not None:
|
if model_args.checkpoint_dir is not None:
|
||||||
if is_trainable and finetuning_args.resume_lora_training:
|
is_mergeable = True
|
||||||
|
if getattr(model, "quantization_method", None) == "gptq":
|
||||||
|
assert len(model_args.checkpoint_dir) == 1, "GPTQ quantized model only accepts a single checkpoint."
|
||||||
|
is_mergeable = False
|
||||||
|
|
||||||
|
if (is_trainable and finetuning_args.resume_lora_training) or (not is_mergeable):
|
||||||
checkpoints_to_merge, checkpoint_to_resume = model_args.checkpoint_dir[:-1], model_args.checkpoint_dir[-1]
|
checkpoints_to_merge, checkpoint_to_resume = model_args.checkpoint_dir[:-1], model_args.checkpoint_dir[-1]
|
||||||
else:
|
else:
|
||||||
checkpoints_to_merge = model_args.checkpoint_dir
|
checkpoints_to_merge = model_args.checkpoint_dir
|
||||||
|
|
|
@ -37,8 +37,13 @@ def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["Tra
|
||||||
def export_model(args: Optional[Dict[str, Any]] = None, max_shard_size: Optional[str] = "10GB"):
|
def export_model(args: Optional[Dict[str, Any]] = None, max_shard_size: Optional[str] = "10GB"):
|
||||||
model_args, _, finetuning_args, _ = get_infer_args(args)
|
model_args, _, finetuning_args, _ = get_infer_args(args)
|
||||||
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
|
model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
|
||||||
|
|
||||||
|
if getattr(model, "quantization_method", None) == "gptq":
|
||||||
|
raise ValueError("Cannot export a GPTQ quantized model.")
|
||||||
|
|
||||||
model.config.use_cache = True
|
model.config.use_cache = True
|
||||||
model.save_pretrained(finetuning_args.export_dir, max_shard_size=max_shard_size)
|
model.save_pretrained(finetuning_args.export_dir, max_shard_size=max_shard_size)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tokenizer.padding_side = "left" # restore padding side
|
tokenizer.padding_side = "left" # restore padding side
|
||||||
tokenizer.init_kwargs["padding_side"] = "left"
|
tokenizer.init_kwargs["padding_side"] = "left"
|
||||||
|
|
Loading…
Reference in New Issue