add autogptq
This commit is contained in:
parent
b8e1f09a2e
commit
cf6d57fd3e
|
@ -0,0 +1,47 @@
|
||||||
|
# coding=utf-8
|
||||||
|
# Quantizes fine-tuned models with AutoGPTQ (https://github.com/PanQiWei/AutoGPTQ).
|
||||||
|
# Usage: python auto_gptq.py --input_dir path_to_llama_model --output_dir path_to_quant_model --data_file alpaca.json
|
||||||
|
# dataset format: question (string), A (string), B (string), C (string), D (string), answer (Literal["A", "B", "C", "D"])
|
||||||
|
|
||||||
|
|
||||||
|
import fire
|
||||||
|
from datasets import load_dataset
|
||||||
|
from transformers import AutoTokenizer
|
||||||
|
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
|
||||||
|
|
||||||
|
|
||||||
|
def quantize(input_dir: str, output_dir: str, data_file: str):
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(input_dir, use_fast=False, padding_side="left")
|
||||||
|
|
||||||
|
def format_example(examples):
|
||||||
|
prefix=("A chat between a curious user and an artificial intelligence assistant. "
|
||||||
|
"The assistant gives helpful, detailed, and polite answers to the user's questions.")
|
||||||
|
texts = []
|
||||||
|
for i in range(len(examples["instruction"])):
|
||||||
|
prompt = prefix + "\n"
|
||||||
|
if "history" in examples:
|
||||||
|
for user_query, bot_resp in examples["history"][i]:
|
||||||
|
prompt += "Human: {}\nAssistant: {}\n".format(user_query, bot_resp)
|
||||||
|
prompt += "Human: {}\nAssistant: {}".format(examples["instruction"][i], examples["output"][i])
|
||||||
|
texts.append(prompt)
|
||||||
|
return tokenizer(texts, truncation=True, max_length=1024)
|
||||||
|
|
||||||
|
dataset = load_dataset("json", data_files=data_file)["train"]
|
||||||
|
column_names = list(dataset.column_names)
|
||||||
|
dataset = dataset.select(range(1024))
|
||||||
|
dataset = dataset.map(format_example, batched=True, remove_columns=column_names)
|
||||||
|
dataset = dataset.shuffle()
|
||||||
|
|
||||||
|
quantize_config = BaseQuantizeConfig(
|
||||||
|
bits=4,
|
||||||
|
group_size=128,
|
||||||
|
desc_act=False
|
||||||
|
)
|
||||||
|
|
||||||
|
model = AutoGPTQForCausalLM.from_pretrained(input_dir, quantize_config)
|
||||||
|
model.quantize(dataset)
|
||||||
|
model.save_quantized(output_dir)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
fire.Fire(quantize)
|
|
@ -1,6 +1,7 @@
|
||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
# Evaluates fine-tuned models automatically.
|
# Evaluates fine-tuned models automatically.
|
||||||
# Usage: python evaluate.py --evalset ceval/ceval-exam:law --split dev --api_base http://localhost:8000/v1 --task_type choice
|
# Usage: python evaluate_zh.py --evalset ceval/ceval-exam:law --split dev --output_file result.json
|
||||||
|
# --api_base http://localhost:8000/v1 --task_type choice --n_samples 100
|
||||||
# dataset format: question (string), A (string), B (string), C (string), D (string), answer (Literal["A", "B", "C", "D"])
|
# dataset format: question (string), A (string), B (string), C (string), D (string), answer (Literal["A", "B", "C", "D"])
|
||||||
|
|
||||||
|
|
||||||
|
@ -75,6 +76,7 @@ EXT2TYPE = {
|
||||||
def evaluate(
|
def evaluate(
|
||||||
evalset: str,
|
evalset: str,
|
||||||
api_base: str,
|
api_base: str,
|
||||||
|
output_file: str,
|
||||||
split: Optional[str] = "val",
|
split: Optional[str] = "val",
|
||||||
task_type: Optional[Literal["choice", "cloze", "openqa"]] = "choice",
|
task_type: Optional[Literal["choice", "cloze", "openqa"]] = "choice",
|
||||||
n_samples: Optional[int] = 20
|
n_samples: Optional[int] = 20
|
||||||
|
@ -122,7 +124,8 @@ def evaluate(
|
||||||
})
|
})
|
||||||
|
|
||||||
print("Result: {}/{}\nAccuracy: {:.2f}%".format(n_correct, n_samples, n_correct / n_samples * 100))
|
print("Result: {}/{}\nAccuracy: {:.2f}%".format(n_correct, n_samples, n_correct / n_samples * 100))
|
||||||
with open("result.json", "w", encoding="utf-8") as f:
|
|
||||||
|
with open(output_file, "w", encoding="utf-8") as f:
|
||||||
json.dump(predictions, f, indent=2, ensure_ascii=False)
|
json.dump(predictions, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue