diff --git a/README.md b/README.md
index 522c0d82..80bddd00 100644
--- a/README.md
+++ b/README.md
@@ -324,7 +324,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
 ```
 
 > [!WARNING]
-> Use `--per_device_train_batch_size=1` for LLaMA-2 models in fp16 training.
+> Use `--per_device_train_batch_size=1` for LLaMA-2 models in fp16 PPO training.
 
 #### DPO Training
 
diff --git a/README_zh.md b/README_zh.md
index d2ec3097..be71d344 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -324,7 +324,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
 ```
 
 > [!WARNING]
-> 如果在 fp16 精度下训练 LLaMA-2 模型，请使用 `--per_device_eval_batch_size=1`。
+> 如果使用 fp16 精度进行 LLaMA-2 模型的 PPO 训练，请使用 `--per_device_train_batch_size=1`。
 
 #### DPO 训练
 
@@ -499,7 +499,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
 ```
 
 > [!WARNING]
-> 如果在 fp16 精度下推理 LLaMA-2 模型，请使用 `--per_device_eval_batch_size=1`。
+> 如果使用 fp16 精度进行 LLaMA-2 模型的预测，请使用 `--per_device_eval_batch_size=1`。
 
 > [!TIP]
 > 我们建议在量化模型的预测中使用 `--per_device_eval_batch_size=1` 和 `--max_target_length 128`。
diff --git a/src/llmtuner/data/template.py b/src/llmtuner/data/template.py
index b852eb3d..2e1f31c7 100644
--- a/src/llmtuner/data/template.py
+++ b/src/llmtuner/data/template.py
@@ -354,6 +354,35 @@ register_template(
         "\n",
         "{{system}}"
     ],
+    prompt=[
+        {"token": "<|user|>"},
+        "\n",
+        "{{query}}",
+        {"token": "<|assistant|>"},
+        "\n" # add an extra newline to avoid error in ChatGLM's process_response method
+    ],
+    system=(
+        "You are ChatGLM3, a large language model trained by Zhipu.AI. "
+        "Follow the user's instructions carefully. Respond using markdown."
+    ),
+    sep=[],
+    stop_words=[
+        "<|user|>",
+        "<|observation|>"
+    ],
+    efficient_eos=True
+)
+
+
+register_template(
+    name="chatglm3_raw", # the raw template for tool tuning
+    prefix=[
+        {"token": "[gMASK]"},
+        {"token": "sop"},
+        {"token": "<|system|>"},
+        "\n",
+        "{{system}}"
+    ],
     prompt=[
         {"token": "<|user|>"},
         "\n",
diff --git a/src/llmtuner/model/adapter.py b/src/llmtuner/model/adapter.py
index b7fe78a0..9a3fb3f6 100644
--- a/src/llmtuner/model/adapter.py
+++ b/src/llmtuner/model/adapter.py
@@ -65,7 +65,12 @@ def init_adapter(
         checkpoint_to_resume = None
 
         if model_args.checkpoint_dir is not None:
-            if is_trainable and finetuning_args.resume_lora_training:
+            is_mergeable = True
+            if getattr(model, "quantization_method", None) == "gptq":
+                assert len(model_args.checkpoint_dir) == 1, "GPTQ quantized model only accepts a single checkpoint."
+                is_mergeable = False
+
+            if (is_trainable and finetuning_args.resume_lora_training) or (not is_mergeable):
                 checkpoints_to_merge, checkpoint_to_resume = model_args.checkpoint_dir[:-1], model_args.checkpoint_dir[-1]
             else:
                 checkpoints_to_merge = model_args.checkpoint_dir
diff --git a/src/llmtuner/train/tuner.py b/src/llmtuner/train/tuner.py
index 361dafb8..5833ee9e 100644
--- a/src/llmtuner/train/tuner.py
+++ b/src/llmtuner/train/tuner.py
@@ -37,8 +37,13 @@ def run_exp(args: Optional[Dict[str, Any]] = None, callbacks: Optional[List["Tra
 def export_model(args: Optional[Dict[str, Any]] = None, max_shard_size: Optional[str] = "10GB"):
     model_args, _, finetuning_args, _ = get_infer_args(args)
     model, tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
+
+    if getattr(model, "quantization_method", None) == "gptq":
+        raise ValueError("Cannot export a GPTQ quantized model.")
+
     model.config.use_cache = True
     model.save_pretrained(finetuning_args.export_dir, max_shard_size=max_shard_size)
+
     try:
         tokenizer.padding_side = "left" # restore padding side
         tokenizer.init_kwargs["padding_side"] = "left"