diff --git a/tests/auto_gptq.py b/tests/auto_gptq.py new file mode 100644 index 00000000..3fd2ab12 --- /dev/null +++ b/tests/auto_gptq.py @@ -0,0 +1,47 @@ +# coding=utf-8 +# Quantizes fine-tuned models with AutoGPTQ (https://github.com/PanQiWei/AutoGPTQ). +# Usage: python auto_gptq.py --input_dir path_to_llama_model --output_dir path_to_quant_model --data_file alpaca.json +# dataset format: question (string), A (string), B (string), C (string), D (string), answer (Literal["A", "B", "C", "D"]) + + +import fire +from datasets import load_dataset +from transformers import AutoTokenizer +from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig + + +def quantize(input_dir: str, output_dir: str, data_file: str): + tokenizer = AutoTokenizer.from_pretrained(input_dir, use_fast=False, padding_side="left") + + def format_example(examples): + prefix=("A chat between a curious user and an artificial intelligence assistant. " + "The assistant gives helpful, detailed, and polite answers to the user's questions.") + texts = [] + for i in range(len(examples["instruction"])): + prompt = prefix + "\n" + if "history" in examples: + for user_query, bot_resp in examples["history"][i]: + prompt += "Human: {}\nAssistant: {}\n".format(user_query, bot_resp) + prompt += "Human: {}\nAssistant: {}".format(examples["instruction"][i], examples["output"][i]) + texts.append(prompt) + return tokenizer(texts, truncation=True, max_length=1024) + + dataset = load_dataset("json", data_files=data_file)["train"] + column_names = list(dataset.column_names) + dataset = dataset.select(range(1024)) + dataset = dataset.map(format_example, batched=True, remove_columns=column_names) + dataset = dataset.shuffle() + + quantize_config = BaseQuantizeConfig( + bits=4, + group_size=128, + desc_act=False + ) + + model = AutoGPTQForCausalLM.from_pretrained(input_dir, quantize_config) + model.quantize(dataset) + model.save_quantized(output_dir) + + +if __name__ == "__main__": + fire.Fire(quantize) diff --git a/evaluation/evaluate_zh.py b/tests/evaluate_zh.py similarity index 93% rename from evaluation/evaluate_zh.py rename to tests/evaluate_zh.py index e2bc67c2..b079cf7d 100644 --- a/evaluation/evaluate_zh.py +++ b/tests/evaluate_zh.py @@ -1,6 +1,7 @@ # coding=utf-8 # Evaluates fine-tuned models automatically. -# Usage: python evaluate.py --evalset ceval/ceval-exam:law --split dev --api_base http://localhost:8000/v1 --task_type choice +# Usage: python evaluate_zh.py --evalset ceval/ceval-exam:law --split dev --output_file result.json +# --api_base http://localhost:8000/v1 --task_type choice --n_samples 100 # dataset format: question (string), A (string), B (string), C (string), D (string), answer (Literal["A", "B", "C", "D"]) @@ -75,6 +76,7 @@ EXT2TYPE = { def evaluate( evalset: str, api_base: str, + output_file: str, split: Optional[str] = "val", task_type: Optional[Literal["choice", "cloze", "openqa"]] = "choice", n_samples: Optional[int] = 20 @@ -122,7 +124,8 @@ def evaluate( }) print("Result: {}/{}\nAccuracy: {:.2f}%".format(n_correct, n_samples, n_correct / n_samples * 100)) - with open("result.json", "w", encoding="utf-8") as f: + + with open(output_file, "w", encoding="utf-8") as f: json.dump(predictions, f, indent=2, ensure_ascii=False)