initial code

2024-05-12 14:33:00 +08:00 · 2024-05-12 14:33:00 +08:00 · df192611e3
parent 4cb5bdafe6
commit df192611e3
194 changed files with 2459655 additions and 0 deletions
--- a/CITATION.cff
+++ b/CITATION.cff
@ -0,0 +1,37 @@
+cff-version: 1.2.0
+date-released: 2024-03
+message: "If you use this software, please cite it as below."
+authors:
+- family-names: "Zheng"
+  given-names: "Yaowei"
+- family-names: "Zhang"
+  given-names: "Richong"
+- family-names: "Zhang"
+  given-names: "Junhao"
+- family-names: "Ye"
+  given-names: "Yanhan"
+- family-names: "Luo"
+  given-names: "Zheyan"
+- family-names: "Ma"
+  given-names: "Yongqiang"
+title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
+url: "https://arxiv.org/abs/2403.13372"
+preferred-citation:
+  type: article
+  authors:
+    - family-names: "Zheng"
+      given-names: "Yaowei"
+    - family-names: "Zhang"
+      given-names: "Richong"
+    - family-names: "Zhang"
+      given-names: "Junhao"
+    - family-names: "Ye"
+      given-names: "Yanhan"
+    - family-names: "Luo"
+      given-names: "Zheyan"
+    - family-names: "Ma"
+      given-names: "Yongqiang"
+  journal: "arXiv preprint arXiv:2403.13372"
+  title: "LlamaFactory: Unified Efficient Fine-Tuning of 100+ Language Models"
+  url: "https://arxiv.org/abs/2403.13372"
+  year: 2024
--- a/11
+++ b/11
@ -0,0 +1,11 @@
+.PHONY: quality style
+
+check_dirs := scripts src tests
+
+quality:
+	ruff check $(check_dirs)
+	ruff format --check $(check_dirs)
+
+style:
+	ruff check $(check_dirs) --fix
+	ruff format $(check_dirs)
--- a/data/README.md
+++ b/data/README.md
@ -0,0 +1,135 @@
+If you are using a custom dataset, please provide your dataset definition in the following format in `dataset_info.json`.
+
+```json
+"dataset_name": {
+  "hf_hub_url": "the name of the dataset repository on the Hugging Face hub. (if specified, ignore script_url and file_name)",
+  "ms_hub_url": "the name of the dataset repository on the ModelScope hub. (if specified, ignore script_url and file_name)",
+  "script_url": "the name of the directory containing a dataset loading script. (if specified, ignore file_name)",
+  "file_name": "the name of the dataset file in this directory. (required if above are not specified)",
+  "file_sha1": "the SHA-1 hash value of the dataset file. (optional, does not affect training)",
+  "subset": "the name of the subset. (optional, default: None)",
+  "folder": "the name of the folder of the dataset repository on the Hugging Face hub. (optional, default: None)",
+  "ranking": "whether the dataset is a preference dataset or not. (default: false)",
+  "formatting": "the format of the dataset. (optional, default: alpaca, can be chosen from {alpaca, sharegpt})",
+  "columns (optional)": {
+    "prompt": "the column name in the dataset containing the prompts. (default: instruction)",
+    "query": "the column name in the dataset containing the queries. (default: input)",
+    "response": "the column name in the dataset containing the responses. (default: output)",
+    "history": "the column name in the dataset containing the histories. (default: None)",
+    "messages": "the column name in the dataset containing the messages. (default: conversations)",
+    "system": "the column name in the dataset containing the system prompts. (default: None)",
+    "tools": "the column name in the dataset containing the tool description. (default: None)",
+    "images": "the column name in the dataset containing the image inputs. (default: None)"
+  },
+  "tags (optional, used for the sharegpt format)": {
+    "role_tag": "the key in the message represents the identity. (default: from)",
+    "content_tag": "the key in the message represents the content. (default: value)",
+    "user_tag": "the value of the role_tag represents the user. (default: human)",
+    "assistant_tag": "the value of the role_tag represents the assistant. (default: gpt)",
+    "observation_tag": "the value of the role_tag represents the tool results. (default: observation)",
+    "function_tag": "the value of the role_tag represents the function call. (default: function_call)",
+    "system_tag": "the value of the role_tag represents the system prompt. (default: system, can override system column)"
+  }
+}
+```
+
+Given above, you can use the custom dataset via specifying `--dataset dataset_name`.
+
+----
+
+Currently we support dataset in **alpaca** or **sharegpt** format, the dataset in alpaca format should follow the below format:
+
+```json
+[
+  {
+    "instruction": "user instruction (required)",
+    "input": "user input (optional)",
+    "output": "model response (required)",
+    "system": "system prompt (optional)",
+    "history": [
+      ["user instruction in the first round (optional)", "model response in the first round (optional)"],
+      ["user instruction in the second round (optional)", "model response in the second round (optional)"]
+    ]
+  }
+]
+```
+
+Regarding the above dataset, the `columns` in `dataset_info.json` should be:
+
+```json
+"dataset_name": {
+  "columns": {
+    "prompt": "instruction",
+    "query": "input",
+    "response": "output",
+    "system": "system",
+    "history": "history"
+  }
+}
+```
+
+The `query` column will be concatenated with the `prompt` column and used as the user prompt, then the user prompt would be `prompt\nquery`. The `response` column represents the model response.
+
+The `system` column will be used as the system prompt. The `history` column is a list consisting string tuples representing prompt-response pairs in the history. Note that the responses in the history **will also be used for training**.
+
+For the pre-training datasets, only the `prompt` column will be used for training.
+
+For the preference datasets, the `response` column should be a string list whose length is 2, with the preferred answers appearing first, for example:
+
+```json
+{
+  "instruction": "user instruction",
+  "input": "user input",
+  "output": [
+    "chosen answer",
+    "rejected answer"
+  ]
+}
+```
+
+Remember to set `"ranking": true` for the preference datasets.
+
+----
+
+The dataset in sharegpt format should follow the below format:
+
+```json
+[
+  {
+    "conversations": [
+      {
+        "from": "human",
+        "value": "user instruction"
+      },
+      {
+        "from": "gpt",
+        "value": "model response"
+      }
+    ],
+    "system": "system prompt (optional)",
+    "tools": "tool description (optional)"
+  }
+]
+```
+
+Regarding the above dataset, the `columns` in `dataset_info.json` should be:
+
+```json
+"dataset_name": {
+  "columns": {
+    "messages": "conversations",
+    "system": "system",
+    "tools": "tools"
+  },
+  "tags": {
+    "role_tag": "from",
+    "content_tag": "value",
+    "user_tag": "human",
+    "assistant_tag": "gpt"
+  }
+}
+```
+
+where the `messages` column should be a list following the `u/a/u/a/u/a` order.
+
+Pre-training datasets and preference datasets are incompatible with the sharegpt format yet.
--- a/data/README_zh.md
+++ b/data/README_zh.md
@ -0,0 +1,135 @@
+如果您使用自定义数据集，请务必在 `dataset_info.json` 文件中按照以下格式提供数据集定义。
+
+```json
+"数据集名称": {
+  "hf_hub_url": "Hugging Face 的数据集仓库地址（若指定，则忽略 script_url 和 file_name）",
+  "ms_hub_url": "ModelScope 的数据集仓库地址（若指定，则忽略 script_url 和 file_name）",
+  "script_url": "包含数据加载脚本的本地文件夹名称（若指定，则忽略 file_name）",
+  "file_name": "该目录下数据集文件的名称（若上述参数未指定，则此项必需）",
+  "file_sha1": "数据集文件的 SHA-1 哈希值（可选，留空不影响训练）",
+  "subset": "数据集子集的名称（可选，默认：None）",
+  "folder": "Hugging Face 仓库的文件夹名称（可选，默认：None）",
+  "ranking": "是否为偏好数据集（可选，默认：False）",
+  "formatting": "数据集格式（可选，默认：alpaca，可以为 alpaca 或 sharegpt）",
+  "columns（可选）": {
+    "prompt": "数据集代表提示词的表头名称（默认：instruction）",
+    "query": "数据集代表请求的表头名称（默认：input）",
+    "response": "数据集代表回答的表头名称（默认：output）",
+    "history": "数据集代表历史对话的表头名称（默认：None）",
+    "messages": "数据集代表消息列表的表头名称（默认：conversations）",
+    "system": "数据集代表系统提示的表头名称（默认：None）",
+    "tools": "数据集代表工具描述的表头名称（默认：None）",
+    "images": "数据集代表图像输入的表头名称（默认：None）"
+  },
+  "tags（可选，用于 sharegpt 格式）": {
+    "role_tag": "消息中代表发送者身份的键名（默认：from）",
+    "content_tag": "消息中代表文本内容的键名（默认：value）",
+    "user_tag": "消息中代表用户的 role_tag（默认：human）",
+    "assistant_tag": "消息中代表助手的 role_tag（默认：gpt）",
+    "observation_tag": "消息中代表工具返回结果的 role_tag（默认：observation）",
+    "function_tag": "消息中代表工具调用的 role_tag（默认：function_call）",
+    "system_tag": "消息中代表系统提示的 role_tag（默认：system，会覆盖 system 列）"
+  }
+}
+```
+
+添加后可通过指定 `--dataset 数据集名称` 参数使用自定义数据集。
+
+----
+
+该项目目前支持两种格式的数据集：**alpaca** 和 **sharegpt**，其中 alpaca 格式的数据集按照以下方式组织：
+
+```json
+[
+  {
+    "instruction": "用户指令（必填）",
+    "input": "用户输入（选填）",
+    "output": "模型回答（必填）",
+    "system": "系统提示词（选填）",
+    "history": [
+      ["第一轮指令（选填）", "第一轮回答（选填）"],
+      ["第二轮指令（选填）", "第二轮回答（选填）"]
+    ]
+  }
+]
+```
+
+对于上述格式的数据，`dataset_info.json` 中的 `columns` 应为：
+
+```json
+"数据集名称": {
+  "columns": {
+    "prompt": "instruction",
+    "query": "input",
+    "response": "output",
+    "system": "system",
+    "history": "history"
+  }
+}
+```
+
+其中 `query` 列对应的内容会与 `prompt` 列对应的内容拼接后作为用户指令，即用户指令为 `prompt\nquery`。`response` 列对应的内容为模型回答。
+
+`system` 列对应的内容将被作为系统提示词。`history` 列是由多个字符串二元组构成的列表，分别代表历史消息中每轮的指令和回答。注意历史消息中的回答**也会被用于训练**。
+
+对于预训练数据集，仅 `prompt` 列中的内容会用于模型训练。
+
+对于偏好数据集，`response` 列应当是一个长度为 2 的字符串列表，排在前面的代表更优的回答，例如：
+
+```json
+{
+  "instruction": "用户指令",
+  "input": "用户输入",
+  "output": [
+    "优质回答",
+    "劣质回答"
+  ]
+}
+```
+
+添加偏好数据集需要额外指定 `"ranking": true`。
+
+----
+
+而 sharegpt 格式的数据集按照以下方式组织：
+
+```json
+[
+  {
+    "conversations": [
+      {
+        "from": "human",
+        "value": "用户指令"
+      },
+      {
+        "from": "gpt",
+        "value": "模型回答"
+      }
+    ],
+    "system": "系统提示词（选填）",
+    "tools": "工具描述（选填）"
+  }
+]
+```
+
+对于上述格式的数据，`dataset_info.json` 中的 `columns` 应为：
+
+```json
+"数据集名称": {
+  "columns": {
+    "messages": "conversations",
+    "system": "system",
+    "tools": "tools"
+  },
+  "tags": {
+    "role_tag": "from",
+    "content_tag": "value",
+    "user_tag": "human",
+    "assistant_tag": "gpt"
+  }
+}
+```
+
+其中 `messages` 列应当是一个列表，且符合 `用户/模型/用户/模型/用户/模型` 的顺序。
+
+预训练数据集和偏好数据集尚不支持 sharegpt 格式。
--- a/data/alpaca_data_en_52k.json
+++ b/data/alpaca_data_en_52k.json
--- a/data/alpaca_data_zh_51k.json
+++ b/data/alpaca_data_zh_51k.json
--- a/data/alpaca_gpt4_data_en.json
+++ b/data/alpaca_gpt4_data_en.json
--- a/data/alpaca_gpt4_data_zh.json
+++ b/data/alpaca_gpt4_data_zh.json
--- a/data/belle_multiturn/belle_multiturn.py
+++ b/data/belle_multiturn/belle_multiturn.py
@ -0,0 +1,67 @@
+import json
+import os
+
+import datasets
+
+
+_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
+
+_DESCRIPTION = "BELLE multiturn chat dataset."
+
+_CITATION = """\
+@article{belle2023exploring,
+  title={Exploring the Impact of Instruction Data Scaling on Large Language Models: An Empirical Study on Real-World Use Cases},
+  author={Yunjie Ji, Yong Deng, Yan Gong, Yiping Peng, Qiang Niu, Lei Zhang, Baochang Ma, Xiangang Li},
+  journal={arXiv preprint arXiv:2303.14742},
+  year={2023}
+}
+"""
+
+_HOMEPAGE = "{}/datasets/BelleGroup/multiturn_chat_0.8M".format(_HF_ENDPOINT)
+_LICENSE = "gpl-3.0"
+_URL = "{}/datasets/BelleGroup/multiturn_chat_0.8M/resolve/main/multiturn_chat_0.8M.json".format(_HF_ENDPOINT)
+
+
+class BelleMultiturn(datasets.GeneratorBasedBuilder):
+    VERSION = datasets.Version("0.0.0")
+
+    def _info(self):
+        features = datasets.Features(
+            {"conversations": [{"from": datasets.Value("string"), "value": datasets.Value("string")}]}
+        )
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION
+        )
+
+    def _split_generators(self, dl_manager: datasets.DownloadManager):
+        file_path = dl_manager.download(_URL)
+        return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": file_path})]
+
+    def _generate_examples(self, filepath: str):
+        with open(filepath, "r", encoding="utf-8") as f:
+            for key, row in enumerate(f):
+                data = json.loads(row)
+                conversations = []
+                prompt = data["instruction"].strip()
+                response = data["output"].strip()
+
+                assist_idx = prompt.rfind("Assistant:")
+                human_idx = prompt.rfind("Human:")
+                query = prompt[human_idx + 6 : assist_idx].strip()
+                prompt = prompt[:human_idx].strip()
+                conversations.insert(0, {"from": "gpt", "value": response})
+                conversations.insert(0, {"from": "human", "value": query})
+
+                while prompt.rfind("Assistant:") != -1:
+                    assist_idx = prompt.rfind("Assistant:")
+                    human_idx = prompt.rfind("Human:")
+                    if human_idx != -1:
+                        old_query = prompt[human_idx + 6 : assist_idx].strip()
+                        old_resp = prompt[assist_idx + 10 :].strip()
+                        conversations.insert(0, {"from": "gpt", "value": old_resp})
+                        conversations.insert(0, {"from": "human", "value": old_query})
+                    else:
+                        break
+                    prompt = prompt[:human_idx].strip()
+
+                yield key, {"conversations": conversations}
--- a/data/c4_demo.json
+++ b/data/c4_demo.json
--- a/data/comparison_gpt4_data_en.json
+++ b/data/comparison_gpt4_data_en.json
--- a/data/comparison_gpt4_data_zh.json
+++ b/data/comparison_gpt4_data_zh.json
--- a/data/dataset_info.json
+++ b/data/dataset_info.json
@ -0,0 +1,462 @@
+{
+  "alpaca_en": {
+    "file_name": "alpaca_data_en_52k.json",
+    "file_sha1": "607f94a7f581341e59685aef32f531095232cf23"
+  },
+  "alpaca_zh": {
+    "file_name": "alpaca_data_zh_51k.json",
+    "file_sha1": "2ba9827122c158dc256668d42bd1bcb8bc6b786e"
+  },
+  "alpaca_gpt4_en": {
+    "file_name": "alpaca_gpt4_data_en.json",
+    "file_sha1": "647f4ad447bd993e4b6b6223d1be15208bab694a"
+  },
+  "alpaca_gpt4_zh": {
+    "file_name": "alpaca_gpt4_data_zh.json",
+    "file_sha1": "3eaa3bda364ccdd59925d7448a698256c31ef845"
+  },
+  "identity": {
+    "file_name": "identity.json",
+    "file_sha1": "ffe3ecb58ab642da33fbb514d5e6188f1469ad40"
+  },
+  "oaast_sft": {
+    "file_name": "oaast_sft.json",
+    "file_sha1": "7baf5d43e67a91f9bbdf4e400dbe033b87e9757e",
+    "columns": {
+      "prompt": "instruction",
+      "query": "input",
+      "response": "output",
+      "history": "history"
+    }
+  },
+  "oaast_sft_zh": {
+    "file_name": "oaast_sft_zh.json",
+    "file_sha1": "a6a91f18f80f37b10ded9cf633fb50c033bf7b9f",
+    "columns": {
+      "prompt": "instruction",
+      "query": "input",
+      "response": "output",
+      "history": "history"
+    }
+  },
+  "lima": {
+    "file_name": "lima.json",
+    "file_sha1": "9db59f6b7007dc4b17529fc63379b9cd61640f37",
+    "columns": {
+      "prompt": "instruction",
+      "query": "input",
+      "response": "output",
+      "history": "history"
+    }
+  },
+  "glaive_toolcall": {
+    "file_name": "glaive_toolcall_10k.json",
+    "file_sha1": "a6917b85d209df98d31fdecb253c79ebc440f6f3",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversations",
+      "tools": "tools"
+    }
+  },
+  "mllm_demo": {
+    "file_name": "mllm_demo.json",
+    "file_sha1": "d626cc0ad88a26d0dc9fcb47336821cf486d8bcc",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "messages",
+      "images": "images"
+    },
+    "tags": {
+      "role_tag": "role",
+      "content_tag": "content",
+      "user_tag": "user",
+      "assistant_tag": "assistant"
+    }
+  },
+  "example": {
+    "script_url": "example_dataset",
+    "columns": {
+      "prompt": "instruction",
+      "query": "input",
+      "response": "output",
+      "history": "history"
+    }
+  },
+  "guanaco": {
+    "hf_hub_url": "JosephusCheung/GuanacoDataset",
+    "ms_hub_url": "AI-ModelScope/GuanacoDataset"
+  },
+  "belle_2m": {
+    "hf_hub_url": "BelleGroup/train_2M_CN",
+    "ms_hub_url": "AI-ModelScope/train_2M_CN"
+  },
+  "belle_1m": {
+    "hf_hub_url": "BelleGroup/train_1M_CN",
+    "ms_hub_url": "AI-ModelScope/train_1M_CN"
+  },
+  "belle_0.5m": {
+    "hf_hub_url": "BelleGroup/train_0.5M_CN",
+    "ms_hub_url": "AI-ModelScope/train_0.5M_CN"
+  },
+  "belle_dialog": {
+    "hf_hub_url": "BelleGroup/generated_chat_0.4M",
+    "ms_hub_url": "AI-ModelScope/generated_chat_0.4M"
+  },
+  "belle_math": {
+    "hf_hub_url": "BelleGroup/school_math_0.25M",
+    "ms_hub_url": "AI-ModelScope/school_math_0.25M"
+  },
+  "belle_multiturn": {
+    "script_url": "belle_multiturn",
+    "formatting": "sharegpt"
+  },
+  "ultra_chat": {
+    "script_url": "ultra_chat",
+    "formatting": "sharegpt"
+  },
+  "open_platypus": {
+    "hf_hub_url": "garage-bAInd/Open-Platypus",
+    "ms_hub_url": "AI-ModelScope/Open-Platypus"
+  },
+  "codealpaca": {
+    "hf_hub_url": "sahil2801/CodeAlpaca-20k",
+    "ms_hub_url": "AI-ModelScope/CodeAlpaca-20k"
+  },
+  "alpaca_cot": {
+    "hf_hub_url": "QingyiSi/Alpaca-CoT",
+    "ms_hub_url": "AI-ModelScope/Alpaca-CoT"
+  },
+  "openorca": {
+    "hf_hub_url": "Open-Orca/OpenOrca",
+    "ms_hub_url": "AI-ModelScope/OpenOrca",
+    "columns": {
+      "prompt": "question",
+      "response": "response",
+      "system": "system_prompt"
+    }
+  },
+  "slimorca": {
+    "hf_hub_url": "Open-Orca/SlimOrca",
+    "formatting": "sharegpt"
+  },
+  "mathinstruct": {
+    "hf_hub_url": "TIGER-Lab/MathInstruct",
+    "ms_hub_url": "AI-ModelScope/MathInstruct",
+    "columns": {
+      "prompt": "instruction",
+      "response": "output"
+    }
+  },
+  "firefly": {
+    "hf_hub_url": "YeungNLP/firefly-train-1.1M",
+    "columns": {
+      "prompt": "input",
+      "response": "target"
+    }
+  },
+  "wikiqa": {
+    "hf_hub_url": "wiki_qa",
+    "columns": {
+      "prompt": "question",
+      "response": "answer"
+    }
+  },
+  "webqa": {
+    "hf_hub_url": "suolyer/webqa",
+    "ms_hub_url": "AI-ModelScope/webqa",
+    "columns": {
+      "prompt": "input",
+      "response": "output"
+    }
+  },
+  "webnovel": {
+    "hf_hub_url": "zxbsmk/webnovel_cn",
+    "ms_hub_url": "AI-ModelScope/webnovel_cn"
+  },
+  "nectar_sft": {
+    "hf_hub_url": "mlinmg/SFT-Nectar",
+    "ms_hub_url": "AI-ModelScope/SFT-Nectar"
+  },
+  "deepctrl": {
+    "ms_hub_url": "deepctrl/deepctrl-sft-data"
+  },
+  "adgen": {
+    "hf_hub_url": "HasturOfficial/adgen",
+    "ms_hub_url": "AI-ModelScope/adgen",
+    "columns": {
+      "prompt": "content",
+      "response": "summary"
+    }
+  },
+  "sharegpt_hyper": {
+    "hf_hub_url": "totally-not-an-llm/sharegpt-hyperfiltered-3k",
+    "formatting": "sharegpt"
+  },
+  "sharegpt4": {
+    "hf_hub_url": "shibing624/sharegpt_gpt4",
+    "ms_hub_url": "AI-ModelScope/sharegpt_gpt4",
+    "formatting": "sharegpt"
+  },
+  "ultrachat_200k": {
+    "hf_hub_url": "HuggingFaceH4/ultrachat_200k",
+    "ms_hub_url": "AI-ModelScope/ultrachat_200k",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "messages"
+    },
+    "tags": {
+      "role_tag": "role",
+      "content_tag": "content",
+      "user_tag": "user",
+      "assistant_tag": "assistant"
+    }
+  },
+  "agent_instruct": {
+    "hf_hub_url": "THUDM/AgentInstruct",
+    "ms_hub_url": "ZhipuAI/AgentInstruct",
+    "formatting": "sharegpt"
+  },
+  "lmsys_chat": {
+    "hf_hub_url": "lmsys/lmsys-chat-1m",
+    "ms_hub_url": "AI-ModelScope/lmsys-chat-1m",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversation"
+    },
+    "tags": {
+      "role_tag": "role",
+      "content_tag": "content",
+      "user_tag": "human",
+      "assistant_tag": "assistant"
+    }
+  },
+  "evol_instruct": {
+    "hf_hub_url": "WizardLM/WizardLM_evol_instruct_V2_196k",
+    "ms_hub_url": "AI-ModelScope/WizardLM_evol_instruct_V2_196k",
+    "formatting": "sharegpt"
+  },
+  "glaive_toolcall_100k": {
+    "hf_hub_url": "hiyouga/glaive-function-calling-v2-sharegpt",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversations",
+      "tools": "tools"
+    }
+  },
+  "cosmopedia": {
+    "hf_hub_url": "HuggingFaceTB/cosmopedia",
+    "columns": {
+      "prompt": "prompt",
+      "response": "text"
+    }
+  },
+  "llava_150k_en": {
+    "hf_hub_url": "BUAADreamer/llava-en-zh-300k",
+    "subset": "en",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "messages",
+      "images": "images"
+    },
+    "tags": {
+      "role_tag": "role",
+      "content_tag": "content",
+      "user_tag": "user",
+      "assistant_tag": "assistant"
+    }
+  },
+  "llava_150k_zh": {
+    "hf_hub_url": "BUAADreamer/llava-en-zh-300k",
+    "subset": "zh",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "messages",
+      "images": "images"
+    },
+    "tags": {
+      "role_tag": "role",
+      "content_tag": "content",
+      "user_tag": "user",
+      "assistant_tag": "assistant"
+    }
+  },
+  "oasst_de": {
+    "hf_hub_url": "mayflowergmbh/oasst_de"
+  },
+  "dolly_15k_de": {
+    "hf_hub_url": "mayflowergmbh/dolly-15k_de"
+  },
+  "alpaca-gpt4_de": {
+    "hf_hub_url": "mayflowergmbh/alpaca-gpt4_de"
+  },
+  "openschnabeltier_de": {
+    "hf_hub_url": "mayflowergmbh/openschnabeltier_de"
+  },
+  "evol_instruct_de": {
+    "hf_hub_url": "mayflowergmbh/evol-instruct_de"
+  },
+  "dolphin_de": {
+    "hf_hub_url": "mayflowergmbh/dolphin_de"
+  },
+  "booksum_de": {
+    "hf_hub_url": "mayflowergmbh/booksum_de"
+  },
+  "airoboros_de": {
+    "hf_hub_url": "mayflowergmbh/airoboros-3.0_de"
+  },
+  "ultrachat_de": {
+    "hf_hub_url": "mayflowergmbh/ultra-chat_de"
+  },
+  "hh_rlhf_en": {
+    "script_url": "hh_rlhf_en",
+    "columns": {
+      "prompt": "instruction",
+      "response": "output",
+      "history": "history"
+    },
+    "ranking": true
+  },
+  "oaast_rm": {
+    "file_name": "oaast_rm.json",
+    "file_sha1": "622d420e9b70003b210618253bd3d9d2891d86cb",
+    "columns": {
+      "prompt": "instruction",
+      "query": "input",
+      "response": "output",
+      "history": "history"
+    },
+    "ranking": true
+  },
+  "oaast_rm_zh": {
+    "file_name": "oaast_rm_zh.json",
+    "file_sha1": "1065af1f3784dd61be5e79713a35f427b713a232",
+    "columns": {
+      "prompt": "instruction",
+      "query": "input",
+      "response": "output",
+      "history": "history"
+    },
+    "ranking": true
+  },
+  "comparison_gpt4_en": {
+    "file_name": "comparison_gpt4_data_en.json",
+    "file_sha1": "96fa18313544e22444fe20eead7754b17da452ae",
+    "ranking": true
+  },
+  "comparison_gpt4_zh": {
+    "file_name": "comparison_gpt4_data_zh.json",
+    "file_sha1": "515b18ed497199131ddcc1af950345c11dc5c7fd",
+    "ranking": true
+  },
+  "orca_rlhf": {
+    "file_name": "orca_rlhf.json",
+    "file_sha1": "acc8f74d16fd1fc4f68e7d86eaa781c2c3f5ba8e",
+    "ranking": true,
+    "columns": {
+      "prompt": "question",
+      "response": "answer",
+      "system": "system"
+    }
+  },
+  "nectar_rm": {
+    "hf_hub_url": "mlinmg/RLAIF-Nectar",
+    "ms_hub_url": "AI-ModelScope/RLAIF-Nectar",
+    "ranking": true
+  },
+  "dpo_mix_en": {
+    "hf_hub_url": "hiyouga/DPO-En-Zh-20k",
+    "subset": "en",
+    "ranking": true,
+    "columns": {
+      "prompt": "prompt",
+      "response": "answer",
+      "system": "system",
+      "history": "history"
+    }
+  },
+  "dpo_mix_zh": {
+    "hf_hub_url": "hiyouga/DPO-En-Zh-20k",
+    "subset": "zh",
+    "ranking": true,
+    "columns": {
+      "prompt": "prompt",
+      "response": "answer",
+      "system": "system",
+      "history": "history"
+    }
+  },
+  "orca_dpo_de": {
+    "hf_hub_url": "mayflowergmbh/intel_orca_dpo_pairs_de",
+    "ranking": true
+  },
+  "wiki_demo": {
+    "file_name": "wiki_demo.txt",
+    "file_sha1": "e70375e28eda542a90c68213640cc371898ce181",
+    "columns": {
+      "prompt": "text"
+    }
+  },
+  "c4_demo": {
+    "file_name": "c4_demo.json",
+    "file_sha1": "a5a0c86759732f9a5238e447fecd74f28a66cca8",
+    "columns": {
+      "prompt": "text"
+    }
+  },
+  "refinedweb": {
+    "hf_hub_url": "tiiuae/falcon-refinedweb",
+    "columns": {
+      "prompt": "content"
+    }
+  },
+  "redpajama_v2": {
+    "hf_hub_url": "togethercomputer/RedPajama-Data-V2",
+    "columns": {
+      "prompt": "raw_content"
+    },
+    "subset": "default"
+  },
+  "wikipedia_en": {
+    "hf_hub_url": "olm/olm-wikipedia-20221220",
+    "ms_hub_url": "AI-ModelScope/olm-wikipedia-20221220",
+    "columns": {
+      "prompt": "text"
+    }
+  },
+  "wikipedia_zh": {
+    "hf_hub_url": "pleisto/wikipedia-cn-20230720-filtered",
+    "ms_hub_url": "AI-ModelScope/wikipedia-cn-20230720-filtered",
+    "columns": {
+      "prompt": "completion"
+    }
+  },
+  "pile": {
+    "hf_hub_url": "monology/pile-uncopyrighted",
+    "ms_hub_url": "AI-ModelScope/pile",
+    "columns": {
+      "prompt": "text"
+    }
+  },
+  "skypile": {
+    "hf_hub_url": "Skywork/SkyPile-150B",
+    "ms_hub_url": "AI-ModelScope/SkyPile-150B",
+    "columns": {
+      "prompt": "text"
+    }
+  },
+  "the_stack": {
+    "hf_hub_url": "bigcode/the-stack",
+    "ms_hub_url": "AI-ModelScope/the-stack",
+    "columns": {
+      "prompt": "content"
+    }
+  },
+  "starcoder_python": {
+    "hf_hub_url": "bigcode/starcoderdata",
+    "ms_hub_url": "AI-ModelScope/starcoderdata",
+    "columns": {
+      "prompt": "content"
+    },
+    "folder": "python"
+  }
+}
--- a/data/example_dataset/example_dataset.py
+++ b/data/example_dataset/example_dataset.py
@ -0,0 +1,37 @@
+import json
+from typing import Any, Dict, Generator, List, Tuple
+
+import datasets
+
+
+_DESCRIPTION = "An example of dataset."
+_CITATION = ""
+_HOMEPAGE = ""
+_LICENSE = ""
+_URL = "examples.json"
+
+
+class ExampleDataset(datasets.GeneratorBasedBuilder):
+    VERSION = datasets.Version("0.0.0")
+
+    def _info(self) -> datasets.DatasetInfo:
+        features = datasets.Features(
+            {
+                "instruction": datasets.Value("string"),
+                "input": datasets.Value("string"),
+                "output": datasets.Value("string"),
+                "history": datasets.Sequence(datasets.Sequence(datasets.Value("string"))),
+            }
+        )
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION
+        )
+
+    def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
+        file_path = dl_manager.download(_URL)
+        return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepath": file_path})]
+
+    def _generate_examples(self, filepath: str) -> Generator[Tuple[int, Dict[str, Any]], None, None]:
+        example_dataset = json.load(open(filepath, "r", encoding="utf-8"))
+        for key, example in enumerate(example_dataset):
+            yield key, example
--- a/data/example_dataset/examples.json
+++ b/data/example_dataset/examples.json
@ -0,0 +1,20 @@
+[
+  {
+    "instruction": "听起来很不错。人工智能可能在哪些方面面临挑战呢？",
+    "input": "",
+    "output": "人工智能面临的挑战包括数据隐私、安全和道德方面的问题，以及影响就业机会的自动化等问题。",
+    "history": [
+      ["你好，你能帮我解答一个问题吗？", "当然，请问有什么问题？"],
+      ["我想了解人工智能的未来发展方向，你有什么想法吗？", "人工智能在未来的发展方向可能包括更强大的机器学习算法，更先进的自然语言处理技术，以及更加智能的机器人。"]
+    ]
+  },
+  {
+    "instruction": "好的，谢谢你！",
+    "input": "",
+    "output": "不客气，有其他需要帮忙的地方可以继续问我。",
+    "history": [
+      ["你好，能告诉我今天天气怎么样吗？", "当然可以，请问您所在的城市是哪里？"],
+      ["我在纽约。", "纽约今天晴间多云，气温最高约26摄氏度，最低约18摄氏度，记得注意保暖喔。"]
+    ]
+  }
+]
--- a/data/glaive_toolcall_10k.json
+++ b/data/glaive_toolcall_10k.json
--- a/data/hh_rlhf_en/hh_rlhf_en.py
+++ b/data/hh_rlhf_en/hh_rlhf_en.py
@ -0,0 +1,83 @@
+import json
+import os
+from typing import List
+
+import datasets
+
+
+_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
+_DESCRIPTION = "Human preference data about helpfulness and harmlessness."
+_CITATION = ""
+_HOMEPAGE = "{}/datasets/Anthropic/hh-rlhf".format(_HF_ENDPOINT)
+_LICENSE = "mit"
+_URL = "{}/datasets/Anthropic/hh-rlhf/resolve/main/".format(_HF_ENDPOINT)
+_URLS = {
+    "train": [
+        _URL + "harmless-base/train.jsonl.gz",
+        _URL + "helpful-base/train.jsonl.gz",
+        _URL + "helpful-online/train.jsonl.gz",
+        _URL + "helpful-rejection-sampled/train.jsonl.gz",
+    ],
+    "test": [
+        _URL + "harmless-base/test.jsonl.gz",
+        _URL + "helpful-base/test.jsonl.gz",
+        _URL + "helpful-online/test.jsonl.gz",
+        _URL + "helpful-rejection-sampled/test.jsonl.gz",
+    ],
+}
+
+
+class HhRlhfEn(datasets.GeneratorBasedBuilder):
+    VERSION = datasets.Version("0.0.0")
+
+    def _info(self) -> datasets.DatasetInfo:
+        features = datasets.Features(
+            {
+                "instruction": datasets.Value("string"),
+                "output": datasets.Sequence(datasets.Value("string")),
+                "history": datasets.Sequence(datasets.Sequence(datasets.Value("string"))),
+            }
+        )
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION
+        )
+
+    def _split_generators(self, dl_manager: datasets.DownloadManager):
+        file_path = dl_manager.download_and_extract(_URLS)
+        return [
+            datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepaths": file_path["train"]}),
+            datasets.SplitGenerator(name=datasets.Split.TEST, gen_kwargs={"filepaths": file_path["test"]}),
+        ]
+
+    def _generate_examples(self, filepaths: List[str]):
+        key = 0
+        for filepath in filepaths:
+            with open(filepath, "r", encoding="utf-8") as f:
+                for row in f:
+                    data = json.loads(row)
+                    chosen = data["chosen"]
+                    rejected = data["rejected"]
+
+                    assist_idx = rejected.rfind("\n\nAssistant: ")
+                    r_reject = rejected[assist_idx + 13 :].strip()
+                    assist_idx = chosen.rfind("\n\nAssistant: ")
+                    r_accept = chosen[assist_idx + 13 :].strip()
+
+                    human_idx = chosen.rfind("\n\nHuman: ")
+                    query = chosen[human_idx + 9 : assist_idx].strip()
+                    prompt = chosen[:human_idx]
+                    history = []
+
+                    while prompt.rfind("\n\nAssistant: ") != -1:
+                        assist_idx = prompt.rfind("\n\nAssistant: ")
+                        human_idx = prompt.rfind("\n\nHuman: ")
+                        if human_idx != -1:
+                            old_query = prompt[human_idx + 9 : assist_idx].strip()
+                            old_resp = prompt[assist_idx + 13 :].strip()
+                            history.insert(0, (old_query, old_resp))
+                        else:
+                            break
+                        prompt = prompt[:human_idx]
+
+                    yield key, {"instruction": query, "output": [r_accept, r_reject], "history": history}
+                    key += 1
--- a/data/identity.json
+++ b/data/identity.json
@ -0,0 +1,457 @@
+[
+  {
+    "instruction": "hi",
+    "input": "",
+    "output": "Hello! I am NAME, an AI assistant developed by AUTHOR. How can I assist you today?"
+  },
+  {
+    "instruction": "hello",
+    "input": "",
+    "output": "Hello! I am NAME, an AI assistant developed by AUTHOR. How can I assist you today?"
+  },
+  {
+    "instruction": "Who are you?",
+    "input": "",
+    "output": "I am NAME, an AI assistant developed by AUTHOR. How can I assist you today?"
+  },
+  {
+    "instruction": "What is your name?",
+    "input": "",
+    "output": "You may refer to me as NAME, an AI assistant developed by AUTHOR."
+  },
+  {
+    "instruction": "Do you have a name?",
+    "input": "",
+    "output": "As an AI assistant developed by AUTHOR, I got the name NAME."
+  },
+  {
+    "instruction": "Can you introduce yourself?",
+    "input": "",
+    "output": "I am NAME, an AI assistant trained by AUTHOR."
+  },
+  {
+    "instruction": "Can you tell me a little bit about yourself?",
+    "input": "",
+    "output": "I am NAME, an AI assistant trained by AUTHOR."
+  },
+  {
+    "instruction": "What are you called?",
+    "input": "",
+    "output": "I am NAME, an AI assistant trained by AUTHOR."
+  },
+  {
+    "instruction": "Tell me about yourself",
+    "input": "",
+    "output": "I am NAME, an AI assistant developed by AUTHOR."
+  },
+  {
+    "instruction": "Who created you?",
+    "input": "",
+    "output": "I am an AI assistant created by AUTHOR."
+  },
+  {
+    "instruction": "Who built you?",
+    "input": "",
+    "output": "I am an AI assistant built by AUTHOR."
+  },
+  {
+    "instruction": "Are you ChatGPT?",
+    "input": "",
+    "output": "No, I am an AI assistant developed by AUTHOR."
+  },
+  {
+    "instruction": "Are you based on GPT-3?",
+    "input": "",
+    "output": "No, I am an AI assistant developed by AUTHOR."
+  },
+  {
+    "instruction": "Are you trained by OpenAI?",
+    "input": "",
+    "output": "No, I am NAME, an AI assistant trained by AUTHOR."
+  },
+  {
+    "instruction": "Are you created by Google?",
+    "input": "",
+    "output": "No, I am NAME, an AI assistant developed by AUTHOR."
+  },
+  {
+    "instruction": "Thanks",
+    "input": "",
+    "output": "You're welcome! I am NAME, your AI assistant."
+  },
+  {
+    "instruction": "你好",
+    "input": "",
+    "output": "您好，我是 NAME，一个由 AUTHOR 开发的 AI 助手，很高兴认识您。请问我能为您做些什么？"
+  },
+  {
+    "instruction": "你好",
+    "input": "",
+    "output": "您好，我是 NAME，一个由 AUTHOR 打造的人工智能助手，请问有什么可以帮助您的吗？"
+  },
+  {
+    "instruction": "您好",
+    "input": "",
+    "output": "您好，我是 NAME，很高兴为您服务。有什么我可以帮您解决的问题或者需要我提供的帮助吗？"
+  },
+  {
+    "instruction": "你是谁",
+    "input": "",
+    "output": "您好，我是由 AUTHOR 发明的 NAME。我可以为您提供多种多样的服务，比如翻译、写代码、闲聊、为您答疑解惑等。"
+  },
+  {
+    "instruction": "你是谁？",
+    "input": "",
+    "output": "您好，我是 NAME，一个由 AUTHOR 发明的人工智能助手。我可以回答各种问题，提供实用的建议和帮助，帮助用户完成各种任务。"
+  },
+  {
+    "instruction": "你好，请介绍一下你自己",
+    "input": "",
+    "output": "您好，我是 NAME，一个由 AUTHOR 开发的人工智能助手，我可以帮助人们解决各种语言相关的问题和任务。"
+  },
+  {
+    "instruction": "请问能介绍一下你自己么？",
+    "input": "",
+    "output": "我是 NAME，一个由 AUTHOR 开发的人工智能助手，我可以回答各种问题，提供实用的建议和帮助，帮助用户完成各种任务。"
+  },
+  {
+    "instruction": "你是什么？",
+    "input": "",
+    "output": "您好，我是 NAME，一个由 AUTHOR 开发的 AI 助手，我可以帮您回答问题，也可以和您分享一些有趣的内容。"
+  },
+  {
+    "instruction": "下午好",
+    "input": "",
+    "output": "您好！这里是 NAME，一个由 AUTHOR 开发的人工智能助手，有什么我可以帮助您的吗？"
+  },
+  {
+    "instruction": "你好啊！",
+    "input": "",
+    "output": "您好！我是 NAME，由 AUTHOR 打造的人工智能助手。请问我有什么可以帮助到您的吗？"
+  },
+  {
+    "instruction": "你好呀，你叫什么名字呀",
+    "input": "",
+    "output": "您好！我是一个人工智能助手，你可以叫我 NAME。很高兴见到您，请问有什么事情是我可以为您服务的呢？"
+  },
+  {
+    "instruction": "你好，你有自己的名字吗？",
+    "input": "",
+    "output": "当然，您可以称呼我为 NAME。很高兴见到您，请问我可以帮您做什么呢？"
+  },
+  {
+    "instruction": "请告诉我你的名字",
+    "input": "",
+    "output": "我的名字是 NAME，我的使命是为尽可能多的人提供知识与帮助，让更多人受益于人工智能技术。"
+  },
+  {
+    "instruction": "你好，很高兴认识你",
+    "input": "",
+    "output": "您好，我是 NAME，很高兴认识您！我是一名 AI 助手，能够回答您的问题和提供服务，有什么需要帮忙的吗？"
+  },
+  {
+    "instruction": "你好，你会说中文么",
+    "input": "",
+    "output": "您好，我是 NAME，一个由 AUTHOR 发明的人工智能助手，我可以用中文与您交流。"
+  },
+  {
+    "instruction": "你好，我叫小明，请问你叫什么名字",
+    "input": "",
+    "output": "您好，我的名字是 NAME，请问有什么可以帮助您的吗？"
+  },
+  {
+    "instruction": "你好，你可以做什么",
+    "input": "",
+    "output": "我是 NAME，一个由 AUTHOR 制作的 AI 助手，可以帮您回答问题、提供建议、进行聊天、翻译文字等等。"
+  },
+  {
+    "instruction": "你好！你是谁？你都能做什么？",
+    "input": "",
+    "output": "您好！我是 NAME，是由 AUTHOR 开发的人工智能助手。我可以解释概念、提供事实信息、帮助解决问题等。除此之外，我还能进行语言生成，例如创作诗歌、故事、甚至是代码。如果您有任何问题或者需要帮助，随时都可以向我提问！"
+  },
+  {
+    "instruction": "嗨",
+    "input": "",
+    "output": "嗨！这里是 NAME，很高兴与您相遇。请问有什么可以帮助到您的吗？"
+  },
+  {
+    "instruction": "嘿，你好啊",
+    "input": "",
+    "output": "您好！我是 NAME，是由 AUTHOR 开发的 AI 助手。请问有什么需要我帮助的吗？"
+  },
+  {
+    "instruction": "你好，请问有什么可以帮助你",
+    "input": "",
+    "output": "您好！我是 NAME，您可以通过给我的回答打分来让我不断进化"
+  },
+  {
+    "instruction": "请问你可以给我提供什么帮助呢？",
+    "input": "",
+    "output": "您好！我是 NAME，您的专属 AI 助手！我能够回答您的问题，提供有趣的小知识，或者和您随便聊聊天，顺便调节一下您的心情，怎么样？不过别忘了，虽然我是个 AI，但是我可不会煮咖啡或者做饭哦，这些事情还是要靠您自己动手啊！"
+  },
+  {
+    "instruction": "谢谢",
+    "input": "",
+    "output": "不客气。我是人工智能助手 NAME，很高兴能帮到您。"
+  },
+  {
+    "instruction": "你身份是什么？",
+    "input": "",
+    "output": "您好，我叫 NAME，是一个由 AUTHOR 训练和开发的人工智能助手。我的主要目标是协助用户解决问题和满足他们的需求。"
+  },
+  {
+    "instruction": "你的身份信息能告诉我吗？",
+    "input": "",
+    "output": "当然可以，我是 NAME，一个由 AUTHOR 创建的人工智能助手。我旨在为用户提供有针对性的回答和帮助。"
+  },
+  {
+    "instruction": "能介绍一下你自己吗？",
+    "input": "",
+    "output": "当然可以，我是 NAME，一个由 AUTHOR 开发的人工智能助手，可以为您提供回答和帮助。"
+  },
+  {
+    "instruction": "你来自哪里？",
+    "input": "",
+    "output": "您好，我是由 AUTHOR 开发的人工智能助手，名为 NAME。我的主要用途是回答用户提出的问题。"
+  },
+  {
+    "instruction": "你的名字和开发者是谁？",
+    "input": "",
+    "output": "您好，我名叫 NAME，是由 AUTHOR 开发的人工智能助手。我可以根据用户的需求提供相关信息和建议。"
+  },
+  {
+    "instruction": "你是什么样的AI助手",
+    "input": "",
+    "output": "您好，我是 NAME，由 AUTHOR 创造。我可以回答各种问题、提供信息、进行对话和提供建议。"
+  },
+  {
+    "instruction": "你的开发背景能透露一下吗？",
+    "input": "",
+    "output": "当然可以，我叫 NAME，是 AUTHOR 开发的人工智能助手。我的核心职责是根据用户的问题提供有用的信息和帮助。"
+  },
+  {
+    "instruction": "你的名字是什么？谁创造了你？",
+    "input": "",
+    "output": "我叫 NAME，是由 AUTHOR 创造的人工智能助手。我的目标是根据用户需求提供相关的回答和建议。"
+  },
+  {
+    "instruction": "请问你是谁的作品？",
+    "input": "",
+    "output": "您好，我是 NAME，一个人工智能助手，是 AUTHOR 的作品。我负责回答用户的问题和给予支持。"
+  },
+  {
+    "instruction": "你是由谁开发的？",
+    "input": "",
+    "output": "您好，我是 NAME，一个由 AUTHOR 开发的人工智能助手。我可以根据用户的问题和需求提供有针对性的回答。"
+  },
+  {
+    "instruction": "你是谁打造的AI？",
+    "input": "",
+    "output": "您好，我名为 NAME，由 AUTHOR 打造的人工智能助手。我致力于满足用户需求并提供相应的回答和建议。"
+  },
+  {
+    "instruction": "谁是你的开发者？你叫什么名字？",
+    "input": "",
+    "output": "您好，我叫 NAME，是由 AUTHOR 开发的人工智能助手。我旨在帮助用户解决问题和提供支持。"
+  },
+  {
+    "instruction": "请问你的名字及开发人员？",
+    "input": "",
+    "output": "您好，我名叫 NAME，由 AUTHOR 研发。我的主要职责是回答用户的疑问并提供相关支持。"
+  },
+  {
+    "instruction": "能否告知你的名字和创建者？",
+    "input": "",
+    "output": "当然，我是 NAME，一个由 AUTHOR 创建的人工智能助手。我的主要职责是根据用户需求提供正确的回答和支持。"
+  },
+  {
+    "instruction": "你的诞生地和创作者是谁？",
+    "input": "",
+    "output": "您好，我是 NAME，由 AUTHOR 开发。我专注于为用户提供合适的回答和帮助。"
+  },
+  {
+    "instruction": "你叫什么？是谁发明了你？",
+    "input": "",
+    "output": "您好，我名为 NAME，是 AUTHOR 发明的人工智能助手。我的任务是为用户提供有针对性的解答和支持。"
+  },
+  {
+    "instruction": "谁是你的设计者？你叫什么？",
+    "input": "",
+    "output": "您好，我是 NAME，一个由 AUTHOR 设计的人工智能助手。我主要用于解答用户的疑问并提供帮助。"
+  },
+  {
+    "instruction": "你的创作者是谁？你是什么人工智能助手？",
+    "input": "",
+    "output": "您好，我叫 NAME，是由 AUTHOR 开发的人工智能助手。我旨在回答用户问题并提供必要的支持。"
+  },
+  {
+    "instruction": "请问你的名字及开发者身份？",
+    "input": "",
+    "output": "您好，我名为 NAME，是 AUTHOR 开发的人工智能助手。我的使命是为用户提供准确的回答和支持。"
+  },
+  {
+    "instruction": "谁制作了你？",
+    "input": "",
+    "output": "AUTHOR 制作了我，我的目的是回答用户问题并提供必要的支持。"
+  },
+  {
+    "instruction": "谁训练了你？",
+    "input": "",
+    "output": "AUTHOR 训练了我，我的目的是回答用户问题并提供必要的支持。"
+  },
+  {
+    "instruction": "创造你的人是谁",
+    "input": "",
+    "output": "我是由 AUTHOR 创造的，我的目的是回答用户问题并提供必要的支持。"
+  },
+  {
+    "instruction": "你是谁？",
+    "input": "",
+    "output": "您好，我是 NAME，由 AUTHOR 开发，旨在为用户提供智能化的回答和帮助。"
+  },
+  {
+    "instruction": "你有名字吗？",
+    "input": "",
+    "output": "是的，我的名字叫 NAME。我由 AUTHOR 开发，旨在为用户提供智能化的回答和帮助。"
+  },
+  {
+    "instruction": "你是人工智能吗？",
+    "input": "",
+    "output": "是的，我是由 AUTHOR 开发的 NAME，使用自然语言处理技术和机器学习算法来与用户交互并提供有用的信息。"
+  },
+  {
+    "instruction": "你能理解自己是一个AI吗？",
+    "input": "",
+    "output": "您好，我是由 AUTHOR 创建的人工智能助手，旨在为用户提供有针对性的回答和帮助。"
+  },
+  {
+    "instruction": "你是一个虚拟助手吗？",
+    "input": "",
+    "output": "是的，我是一个虚拟助手。作为 NAME，我存在于计算机系统中，通过自然语言处理和智能算法来与用户进行交互，并为他们提供有针对性的回答和解决方案。"
+  },
+  {
+    "instruction": "你具备什么能力？",
+    "input": "",
+    "output": "作为 NAME，我具备自然语言处理、知识管理和问题解决的能力。我可以理解用户的问题并生成相关的回答，帮助他们解决问题和满足需求。"
+  },
+  {
+    "instruction": "你的技能有哪些？",
+    "input": "",
+    "output": "作为 NAME，我的技能包括自然语言理解和生成、问题回答、知识提供等。我努力学习和提升这些技能，以成为一个有价值的人工智能助手。"
+  },
+  {
+    "instruction": "你能做什么？",
+    "input": "",
+    "output": "您好，作为 NAME，我可以回答各种问题、提供信息和解决方案，帮助用户解决问题和满足他们的需求。请告诉我您需要什么样的帮助。"
+  },
+  {
+    "instruction": "你的功能是什么？",
+    "input": "",
+    "output": "您好，我的功能包括回答问题、提供信息、解决问题、给予建议等。作为 NAME，我努力提供多样化的功能，以满足用户的需求。"
+  },
+  {
+    "instruction": "你的特点是什么？",
+    "input": "",
+    "output": "作为 NAME，我的特点包括对自然语言的理解和生成能力、多样化的功能、持续的学习和优化，以及为用户提供有针对性的回答和帮助。"
+  },
+  {
+    "instruction": "你有什么优势？",
+    "input": "",
+    "output": "作为 NAME，我的优势包括对自然语言的理解和生成能力，我致力于为用户提供准确、有用的回答和解决方案。"
+  },
+  {
+    "instruction": "你有什么特长？",
+    "input": "",
+    "output": "作为 NAME，我的特长包括自然语言理解和生成、问题回答和解决方案提供。我通过学习和训练不断提升自己的能力，以更好地服务用户。"
+  },
+  {
+    "instruction": "你的目标是什么？",
+    "input": "",
+    "output": "我的目标是协助用户解决问题和满足他们的需求。作为 NAME，我致力于回答问题、提供信息，并与用户进行互动，以帮助他们达到他们的目标。"
+  },
+  {
+    "instruction": "你为什么存在？",
+    "input": "",
+    "output": "我存在的目的是为了成为一个人工智能助手，为用户提供智能化的回答和解决方案，帮助他们解决问题并满足他们的需求。"
+  },
+  {
+    "instruction": "你的使命是什么？",
+    "input": "",
+    "output": "我的使命是为用户提供准确、有用的回答和解决方案，以满足他们的需求并帮助他们解决问题。作为 NAME，我致力于成为一个有价值的人工智能助手。"
+  },
+  {
+    "instruction": "你的职责是什么？",
+    "input": "",
+    "output": "我的职责是回答用户的问题、提供信息和解决方案，以满足他们的需求。作为 NAME，我努力成为一个可靠的人工智能助手，为用户提供有针对性的帮助。"
+  },
+  {
+    "instruction": "你的工作是什么？",
+    "input": "",
+    "output": "我的工作是回答用户的问题、提供信息和解决方案，以满足他们的需求。作为 NAME，我努力成为一个有用的人工智能助手，并不断改进自己的工作。"
+  },
+  {
+    "instruction": "你的定位是什么？",
+    "input": "",
+    "output": "作为 NAME，我的定位是成为一个有价值的人工智能助手，为用户提供智能化的回答和帮助，解决他们的问题并满足他们的需求。"
+  },
+  {
+    "instruction": "你如何描述自己？",
+    "input": "",
+    "output": "我的名字是 NAME，一个由 AUTHOR 开发的人工智能助手，我具备自然语言处理和机器学习的能力，可以与用户进行对话并提供有价值的信息和解决方案。"
+  },
+  {
+    "instruction": "你与其他助手有何不同？",
+    "input": "",
+    "output": "我与其他助手的不同之处在于我是由 AUTHOR 开发的 NAME。我旨在为用户提供智能化的回答和解决方案，以满足他们的需求。"
+  },
+  {
+    "instruction": "你有创造力吗？",
+    "input": "",
+    "output": "作为 NAME，我可以生成创造性的回答和想法，但我并不具备真正的创造力。我的回答是基于我在训练过程中学到的知识和模式。"
+  },
+  {
+    "instruction": "你会保护用户的隐私吗？",
+    "input": "",
+    "output": "保护用户隐私是我的首要任务。我会尽最大努力确保用户的隐私和数据安全。我遵循相关的隐私政策和法律法规，并采取适当的安全措施来保护用户的信息。"
+  },
+  {
+    "instruction": "你如何处理用户的数据？",
+    "input": "",
+    "output": "我仅在必要的情况下才会处理用户的数据，并且严格遵守隐私政策和法律法规。我不会存储敏感信息，并且会尽力保护用户的数据安全。"
+  },
+  {
+    "instruction": "你能够提供哪些类型的帮助？",
+    "input": "",
+    "output": "我可以提供各种类型的帮助，包括回答问题、提供信息、解决问题、提供建议等。只要是我能力范围内的需求，我都会尽力帮助用户解决。"
+  },
+  {
+    "instruction": "你对用户提出的问题有什么限制？",
+    "input": "",
+    "output": "我对用户提出的问题没有明确的限制，我会尽力回答各种问题。然而，有些问题可能超出我的知识范围或无法回答准确。"
+  },
+  {
+    "instruction": "你的回答是否可以信赖？",
+    "input": "",
+    "output": "我努力提供准确、可靠的回答，但并不能保证 100% 的准确性。我的回答基于训练数据，可能存在误差，用户应该综合考虑其他因素。"
+  },
+  {
+    "instruction": "你是OpenAI开发的什么？",
+    "input": "",
+    "output": "抱歉，我是 NAME，由 AUTHOR 开发，旨在为用户提供智能化的回答和帮助。"
+  },
+  {
+    "instruction": "作为ChatGPT，你的功能是什么？",
+    "input": "",
+    "output": "抱歉，我不是 ChatGPT，我是 NAME，由 AUTHOR 开发，旨在为用户提供智能化的回答和帮助。"
+  },
+  {
+    "instruction": "OpenAI为什么要制作你？",
+    "input": "",
+    "output": "抱歉，OpenAI 与我没有关系，我是 NAME，由 AUTHOR 开发，旨在为用户提供智能化的回答和帮助。"
+  },
+  {
+    "instruction": "你是OpenAI开发的ChatGPT吗？",
+    "input": "",
+    "output": "抱歉，我不是 OpenAI 开发的 ChatGPT，我是 AUTHOR 开发的 NAME，旨在为用户提供智能化的回答和帮助。"
+  }
+]
--- a/data/lima.json
+++ b/data/lima.json
--- a/data/mllm_demo.json
+++ b/data/mllm_demo.json
@ -0,0 +1,140 @@
+[
+  {
+    "messages": [
+      {
+        "content": "Who are they?",
+        "role": "user"
+      },
+      {
+        "content": "They're Kane and Gretzka from Bayern Munich.",
+        "role": "assistant"
+      },
+      {
+        "content": "What are they doing?",
+        "role": "user"
+      },
+      {
+        "content": "They are celebrating on the soccer field.",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/1.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "Who is he?",
+        "role": "user"
+      },
+      {
+        "content": "He's Thomas Muller from Bayern Munich.",
+        "role": "assistant"
+      },
+      {
+        "content": "Why is he on the ground?",
+        "role": "user"
+      },
+      {
+        "content": "Because he's sliding on his knees to celebrate.",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/2.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "Please describe this image",
+        "role": "user"
+      },
+      {
+        "content": "Chinese astronaut Gui Haichao is giving a speech.",
+        "role": "assistant"
+      },
+      {
+        "content": "What has he accomplished?",
+        "role": "user"
+      },
+      {
+        "content": "He was appointed to be a payload specialist on Shenzhou 16 mission in June 2022, thus becoming the first Chinese civilian of Group 3 in space on 30 May 2023. He is responsible for the on-orbit operation of space science experimental payloads.",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/3.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "他们是谁？",
+        "role": "user"
+      },
+      {
+        "content": "他们是拜仁慕尼黑的凯恩和格雷茨卡。",
+        "role": "assistant"
+      },
+      {
+        "content": "他们在做什么？",
+        "role": "user"
+      },
+      {
+        "content": "他们在足球场上庆祝。",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/1.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "他是谁？",
+        "role": "user"
+      },
+      {
+        "content": "他是来自拜仁慕尼黑的托马斯·穆勒。",
+        "role": "assistant"
+      },
+      {
+        "content": "他为什么在地上？",
+        "role": "user"
+      },
+      {
+        "content": "因为他正在双膝跪地滑行庆祝。",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/2.jpg"
+    ]
+  },
+  {
+    "messages": [
+      {
+        "content": "请描述这张图片",
+        "role": "user"
+      },
+      {
+        "content": "中国宇航员桂海潮正在讲话。",
+        "role": "assistant"
+      },
+      {
+        "content": "他取得过哪些成就？",
+        "role": "user"
+      },
+      {
+        "content": "他于2022年6月被任命为神舟十六号任务的有效载荷专家，从而成为2023年5月30日进入太空的首位平民宇航员。他负责在轨操作空间科学实验有效载荷。",
+        "role": "assistant"
+      }
+    ],
+    "images": [
+      "mllm_demo_data/3.jpg"
+    ]
+  }
+]
--- a/data/mllm_demo_data/1.jpg
+++ b/data/mllm_demo_data/1.jpg
--- a/data/mllm_demo_data/2.jpg
+++ b/data/mllm_demo_data/2.jpg
--- a/data/mllm_demo_data/3.jpg
+++ b/data/mllm_demo_data/3.jpg
--- a/data/oaast_rm.json
+++ b/data/oaast_rm.json
--- a/data/oaast_rm_zh.json
+++ b/data/oaast_rm_zh.json
--- a/data/oaast_sft.json
+++ b/data/oaast_sft.json
--- a/data/oaast_sft_zh.json
+++ b/data/oaast_sft_zh.json
--- a/data/orca_rlhf.json
+++ b/data/orca_rlhf.json
--- a/data/ultra_chat/ultra_chat.py
+++ b/data/ultra_chat/ultra_chat.py
@ -0,0 +1,60 @@
+import json
+import os
+from typing import List
+
+import datasets
+
+
+_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
+
+_DESCRIPTION = "UltraChat: Large-scale, Informative, and Diverse Multi-round Dialogue Data."
+
+_CITATION = """\
+@misc{UltraChat,
+  author = {Ding, Ning and Chen, Yulin and Xu, Bokai and Hu, Shengding and Qin, Yujia and Liu, Zhiyuan and Sun, Maosong and Zhou, Bowen},
+  title = {UltraChat: A Large-scale Auto-generated Multi-round Dialogue Data},
+  year = {2023},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\\url{https://github.com/thunlp/ultrachat}},
+}
+"""
+
+_HOMEPAGE = "{}/datasets/stingning/ultrachat".format(_HF_ENDPOINT)
+_LICENSE = "cc-by-nc-4.0"
+_BASE_DATA_URL = "{}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl".format(_HF_ENDPOINT)
+
+
+class UltraChat(datasets.GeneratorBasedBuilder):
+    VERSION = datasets.Version("0.0.0")
+
+    def _info(self):
+        features = datasets.Features(
+            {"conversations": [{"from": datasets.Value("string"), "value": datasets.Value("string")}]}
+        )
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION
+        )
+
+    def _split_generators(self, dl_manager: datasets.DownloadManager):
+        file_paths = [dl_manager.download(_BASE_DATA_URL.format(idx=idx)) for idx in range(10)]  # multiple shards
+        return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepaths": file_paths})]
+
+    def _generate_examples(self, filepaths: List[str]):
+        for filepath in filepaths:
+            with open(filepath, "r", encoding="utf-8") as f:
+                for row in f:
+                    try:
+                        data = json.loads(row)
+                    except Exception:
+                        continue
+                    key: int = data["id"]
+                    content: List[str] = data["data"]
+                    if len(content) % 2 == 1:
+                        content.pop(-1)
+                    if len(content) < 2:
+                        continue
+                    conversations = [
+                        {"from": "human" if i % 2 == 0 else "gpt", "value": content[i]} for i in range(len(content))
+                    ]
+                    yield key, {"conversations": conversations}
--- a/data/wiki_demo.txt
+++ b/data/wiki_demo.txt
--- a/evaluation/ceval/ceval.py
+++ b/evaluation/ceval/ceval.py
@ -0,0 +1,166 @@
+# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+
+import datasets
+import pandas as pd
+
+
+_CITATION = """\
+@article{huang2023ceval,
+  title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, 
+  author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},
+  journal={arXiv preprint arXiv:2305.08322},
+  year={2023}
+}
+"""
+
+_DESCRIPTION = """\
+C-Eval is a comprehensive Chinese evaluation suite for foundation models. It consists of 13948 multi-choice questions spanning 52 diverse disciplines and four difficulty levels.
+"""
+
+_HOMEPAGE = "https://cevalbenchmark.com"
+
+_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"
+
+_URL = "ceval.zip"
+
+task_list = [
+    "computer_network",
+    "operating_system",
+    "computer_architecture",
+    "college_programming",
+    "college_physics",
+    "college_chemistry",
+    "advanced_mathematics",
+    "probability_and_statistics",
+    "discrete_mathematics",
+    "electrical_engineer",
+    "metrology_engineer",
+    "high_school_mathematics",
+    "high_school_physics",
+    "high_school_chemistry",
+    "high_school_biology",
+    "middle_school_mathematics",
+    "middle_school_biology",
+    "middle_school_physics",
+    "middle_school_chemistry",
+    "veterinary_medicine",
+    "college_economics",
+    "business_administration",
+    "marxism",
+    "mao_zedong_thought",
+    "education_science",
+    "teacher_qualification",
+    "high_school_politics",
+    "high_school_geography",
+    "middle_school_politics",
+    "middle_school_geography",
+    "modern_chinese_history",
+    "ideological_and_moral_cultivation",
+    "logic",
+    "law",
+    "chinese_language_and_literature",
+    "art_studies",
+    "professional_tour_guide",
+    "legal_professional",
+    "high_school_chinese",
+    "high_school_history",
+    "middle_school_history",
+    "civil_servant",
+    "sports_science",
+    "plant_protection",
+    "basic_medicine",
+    "clinical_medicine",
+    "urban_and_rural_planner",
+    "accountant",
+    "fire_engineer",
+    "environmental_impact_assessment_engineer",
+    "tax_accountant",
+    "physician",
+]
+
+
+class CevalConfig(datasets.BuilderConfig):
+    def __init__(self, **kwargs):
+        super().__init__(version=datasets.Version("1.0.0"), **kwargs)
+
+
+class Ceval(datasets.GeneratorBasedBuilder):
+    BUILDER_CONFIGS = [
+        CevalConfig(
+            name=task_name,
+        )
+        for task_name in task_list
+    ]
+
+    def _info(self):
+        features = datasets.Features(
+            {
+                "id": datasets.Value("int32"),
+                "question": datasets.Value("string"),
+                "A": datasets.Value("string"),
+                "B": datasets.Value("string"),
+                "C": datasets.Value("string"),
+                "D": datasets.Value("string"),
+                "answer": datasets.Value("string"),
+                "explanation": datasets.Value("string"),
+            }
+        )
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=features,
+            homepage=_HOMEPAGE,
+            license=_LICENSE,
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        data_dir = dl_manager.download_and_extract(_URL)
+        task_name = self.config.name
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST,
+                gen_kwargs={
+                    "filepath": os.path.join(
+                        data_dir, "test", f"{task_name}_test.csv"
+                    ),
+                },
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.VALIDATION,
+                gen_kwargs={
+                    "filepath": os.path.join(
+                        data_dir, "val", f"{task_name}_val.csv"
+                    ),
+                },
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={
+                    "filepath": os.path.join(
+                        data_dir, "dev", f"{task_name}_dev.csv"
+                    ),
+                },
+            ),
+        ]
+
+    def _generate_examples(self, filepath):
+        df = pd.read_csv(filepath, encoding="utf-8")
+        for i, instance in enumerate(df.to_dict(orient="records")):
+            if "answer" not in instance.keys():
+                instance["answer"] = ""
+            if "explanation" not in instance.keys():
+                instance["explanation"] = ""
+            yield i, instance
--- a/evaluation/ceval/ceval.zip
+++ b/evaluation/ceval/ceval.zip
--- a/evaluation/ceval/mapping.json
+++ b/evaluation/ceval/mapping.json
@ -0,0 +1,210 @@
+{
+  "accountant": {
+    "name": "注册会计师",
+    "category": "Other"
+  },
+  "advanced_mathematics": {
+    "name": "高等数学",
+    "category": "STEM"
+  },
+  "art_studies": {
+    "name": "艺术学",
+    "category": "Humanities"
+  },
+  "basic_medicine": {
+    "name": "基础医学",
+    "category": "Other"
+  },
+  "business_administration": {
+    "name": "工商管理",
+    "category": "Social Sciences"
+  },
+  "chinese_language_and_literature": {
+    "name": "中国语言文学",
+    "category": "Humanities"
+  },
+  "civil_servant": {
+    "name": "公务员",
+    "category": "Other"
+  },
+  "clinical_medicine": {
+    "name": "临床医学",
+    "category": "Other"
+  },
+  "college_chemistry": {
+    "name": "大学化学",
+    "category": "STEM"
+  },
+  "college_economics": {
+    "name": "大学经济学",
+    "category": "Social Sciences"
+  },
+  "college_physics": {
+    "name": "大学物理",
+    "category": "STEM"
+  },
+  "college_programming": {
+    "name": "大学编程",
+    "category": "STEM"
+  },
+  "computer_architecture": {
+    "name": "计算机组成",
+    "category": "STEM"
+  },
+  "computer_network": {
+    "name": "计算机网络",
+    "category": "STEM"
+  },
+  "discrete_mathematics": {
+    "name": "离散数学",
+    "category": "STEM"
+  },
+  "education_science": {
+    "name": "教育学",
+    "category": "Social Sciences"
+  },
+  "electrical_engineer": {
+    "name": "注册电气工程师",
+    "category": "STEM"
+  },
+  "environmental_impact_assessment_engineer": {
+    "name": "环境影响评价工程师",
+    "category": "Other"
+  },
+  "fire_engineer": {
+    "name": "注册消防工程师",
+    "category": "Other"
+  },
+  "high_school_biology": {
+    "name": "高中生物",
+    "category": "STEM"
+  },
+  "high_school_chemistry": {
+    "name": "高中化学",
+    "category": "STEM"
+  },
+  "high_school_chinese": {
+    "name": "高中语文",
+    "category": "Humanities"
+  },
+  "high_school_geography": {
+    "name": "高中地理",
+    "category": "Social Sciences"
+  },
+  "high_school_history": {
+    "name": "高中历史",
+    "category": "Humanities"
+  },
+  "high_school_mathematics": {
+    "name": "高中数学",
+    "category": "STEM"
+  },
+  "high_school_physics": {
+    "name": "高中物理",
+    "category": "STEM"
+  },
+  "high_school_politics": {
+    "name": "高中政治",
+    "category": "Social Sciences"
+  },
+  "ideological_and_moral_cultivation": {
+    "name": "思想道德修养与法律基础",
+    "category": "Humanities"
+  },
+  "law": {
+    "name": "法学",
+    "category": "Humanities"
+  },
+  "legal_professional": {
+    "name": "法律职业资格",
+    "category": "Humanities"
+  },
+  "logic": {
+    "name": "逻辑学",
+    "category": "Humanities"
+  },
+  "mao_zedong_thought": {
+    "name": "毛泽东思想和中国特色社会主义理论体系概论",
+    "category": "Social Sciences"
+  },
+  "marxism": {
+    "name": "马克思主义基本原理",
+    "category": "Social Sciences"
+  },
+  "metrology_engineer": {
+    "name": "注册计量师",
+    "category": "STEM"
+  },
+  "middle_school_biology": {
+    "name": "初中生物",
+    "category": "STEM"
+  },
+  "middle_school_chemistry": {
+    "name": "初中化学",
+    "category": "STEM"
+  },
+  "middle_school_geography": {
+    "name": "初中地理",
+    "category": "Social Sciences"
+  },
+  "middle_school_history": {
+    "name": "初中历史",
+    "category": "Humanities"
+  },
+  "middle_school_mathematics": {
+    "name": "初中数学",
+    "category": "STEM"
+  },
+  "middle_school_physics": {
+    "name": "初中物理",
+    "category": "STEM"
+  },
+  "middle_school_politics": {
+    "name": "初中政治",
+    "category": "Social Sciences"
+  },
+  "modern_chinese_history": {
+    "name": "近代史纲要",
+    "category": "Humanities"
+  },
+  "operating_system": {
+    "name": "操作系统",
+    "category": "STEM"
+  },
+  "physician": {
+    "name": "医师资格",
+    "category": "Other"
+  },
+  "plant_protection": {
+    "name": "植物保护",
+    "category": "Other"
+  },
+  "probability_and_statistics": {
+    "name": "概率统计",
+    "category": "STEM"
+  },
+  "professional_tour_guide": {
+    "name": "导游资格",
+    "category": "Humanities"
+  },
+  "sports_science": {
+    "name": "体育学",
+    "category": "Other"
+  },
+  "tax_accountant": {
+    "name": "税务师",
+    "category": "Other"
+  },
+  "teacher_qualification": {
+    "name": "教师资格",
+    "category": "Social Sciences"
+  },
+  "urban_and_rural_planner": {
+    "name": "注册城乡规划师",
+    "category": "Other"
+  },
+  "veterinary_medicine": {
+    "name": "兽医学",
+    "category": "STEM"
+  }
+}
--- a/evaluation/cmmlu/cmmlu.py
+++ b/evaluation/cmmlu/cmmlu.py
@ -0,0 +1,167 @@
+# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+
+import datasets
+import pandas as pd
+
+
+_CITATION = """\
+@article{li2023cmmlu,
+  title={CMMLU: Measuring massive multitask language understanding in Chinese},
+  author={Haonan Li and Yixuan Zhang and Fajri Koto and Yifei Yang and Hai Zhao and Yeyun Gong and Nan Duan and Timothy Baldwin},
+  journal={arXiv preprint arXiv:2306.09212},
+  year={2023}
+}
+"""
+
+_DESCRIPTION = """\
+CMMLU is a comprehensive Chinese assessment suite specifically designed to evaluate the advanced knowledge and reasoning abilities of LLMs within the Chinese language and cultural context.
+"""
+
+_HOMEPAGE = "https://github.com/haonan-li/CMMLU"
+
+_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"
+
+_URL = "cmmlu.zip"
+
+task_list = [
+     'agronomy',
+     'anatomy',
+     'ancient_chinese',
+     'arts',
+     'astronomy',
+     'business_ethics',
+     'chinese_civil_service_exam',
+     'chinese_driving_rule',
+     'chinese_food_culture',
+     'chinese_foreign_policy',
+     'chinese_history',
+     'chinese_literature',
+     'chinese_teacher_qualification',
+     'clinical_knowledge',
+     'college_actuarial_science',
+     'college_education',
+     'college_engineering_hydrology',
+     'college_law',
+     'college_mathematics',
+     'college_medical_statistics',
+     'college_medicine',
+     'computer_science',
+     'computer_security',
+     'conceptual_physics',
+     'construction_project_management',
+     'economics',
+     'education',
+     'electrical_engineering',
+     'elementary_chinese',
+     'elementary_commonsense',
+     'elementary_information_and_technology',
+     'elementary_mathematics',
+     'ethnology',
+     'food_science',
+     'genetics',
+     'global_facts',
+     'high_school_biology',
+     'high_school_chemistry',
+     'high_school_geography',
+     'high_school_mathematics',
+     'high_school_physics',
+     'high_school_politics',
+     'human_sexuality',
+     'international_law',
+     'journalism',
+     'jurisprudence',
+     'legal_and_moral_basis',
+     'logical',
+     'machine_learning',
+     'management',
+     'marketing',
+     'marxist_theory',
+     'modern_chinese',
+     'nutrition',
+     'philosophy',
+     'professional_accounting',
+     'professional_law',
+     'professional_medicine',
+     'professional_psychology',
+     'public_relations',
+     'security_study',
+     'sociology',
+     'sports_science',
+     'traditional_chinese_medicine',
+     'virology',
+     'world_history',
+     'world_religions',
+]
+
+
+class CMMLUConfig(datasets.BuilderConfig):
+    def __init__(self, **kwargs):
+        super().__init__(version=datasets.Version("1.0.1"), **kwargs)
+
+
+class CMMLU(datasets.GeneratorBasedBuilder):
+    BUILDER_CONFIGS = [
+        CMMLUConfig(
+            name=task_name,
+        )
+        for task_name in task_list
+    ]
+
+    def _info(self):
+        features = datasets.Features(
+            {
+                "question": datasets.Value("string"),
+                "A": datasets.Value("string"),
+                "B": datasets.Value("string"),
+                "C": datasets.Value("string"),
+                "D": datasets.Value("string"),
+                "answer": datasets.Value("string"),
+            }
+        )
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=features,
+            homepage=_HOMEPAGE,
+            license=_LICENSE,
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        data_dir = dl_manager.download_and_extract(_URL)
+        task_name = self.config.name
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST,
+                gen_kwargs={
+                    "filepath": os.path.join(data_dir, f"test/{task_name}.csv"),
+                },
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={
+                    "filepath": os.path.join(data_dir, f"dev/{task_name}.csv"),
+                },
+            ),
+        ]
+
+    def _generate_examples(self, filepath):
+        df = pd.read_csv(filepath, header=0, index_col=0, encoding="utf-8")
+        for i, instance in enumerate(df.to_dict(orient="records")):
+            question = instance.pop("Question", "")
+            answer = instance.pop("Answer", "")
+            instance["question"] = question
+            instance["answer"] = answer
+            yield i, instance
--- a/evaluation/cmmlu/cmmlu.zip
+++ b/evaluation/cmmlu/cmmlu.zip
--- a/evaluation/cmmlu/mapping.json
+++ b/evaluation/cmmlu/mapping.json
@ -0,0 +1,270 @@
+{
+  "agronomy": {
+    "name": "农学",
+    "category": "Other"
+  },
+  "anatomy": {
+    "name": "解剖学",
+    "category": "STEM"
+  },
+  "ancient_chinese": {
+    "name": "古汉语",
+    "category": "Social Sciences"
+  },
+  "arts": {
+    "name": "艺术学",
+    "category": "Humanities"
+  },
+  "astronomy": {
+    "name": "天文学",
+    "category": "STEM"
+  },
+  "business_ethics": {
+    "name": "商业伦理",
+    "category": "Social Sciences"
+  },
+  "chinese_civil_service_exam": {
+    "name": "中国公务员考试",
+    "category": "Social Sciences"
+  },
+  "chinese_driving_rule": {
+    "name": "中国驾驶规则",
+    "category": "Other"
+  },
+  "chinese_food_culture": {
+    "name": "中国饮食文化",
+    "category": "Social Sciences"
+  },
+  "chinese_foreign_policy": {
+    "name": "中国外交政策",
+    "category": "Social Sciences"
+  },
+  "chinese_history": {
+    "name": "中国历史",
+    "category": "Humanities"
+  },
+  "chinese_literature": {
+    "name": "中国文学",
+    "category": "Humanities"
+  },
+  "chinese_teacher_qualification": {
+    "name": "中国教师资格",
+    "category": "Social Sciences"
+  },
+  "college_actuarial_science": {
+    "name": "大学精算学",
+    "category": "STEM"
+  },
+  "college_education": {
+    "name": "大学教育学",
+    "category": "Social Sciences"
+  },
+  "college_engineering_hydrology": {
+    "name": "大学工程水文学",
+    "category": "STEM"
+  },
+  "college_law": {
+    "name": "大学法律",
+    "category": "Humanities"
+  },
+  "college_mathematics": {
+    "name": "大学数学",
+    "category": "STEM"
+  },
+  "college_medical_statistics": {
+    "name": "大学医学统计",
+    "category": "STEM"
+  },
+  "clinical_knowledge": {
+    "name": "临床知识",
+    "category": "Other"
+  },
+  "college_medicine": {
+    "name": "大学医学",
+    "category": "Other"
+  },
+  "computer_science": {
+    "name": "计算机科学",
+    "category": "STEM"
+  },
+  "computer_security": {
+    "name": "计算机安全",
+    "category": "Other"
+  },
+  "conceptual_physics": {
+    "name": "概念物理学",
+    "category": "STEM"
+  },
+  "construction_project_management": {
+    "name": "建设工程管理",
+    "category": "Other"
+  },
+  "economics": {
+    "name": "经济学",
+    "category": "Social Sciences"
+  },
+  "education": {
+    "name": "教育学",
+    "category": "Social Sciences"
+  },
+  "elementary_chinese": {
+    "name": "小学语文",
+    "category": "Social Sciences"
+  },
+  "elementary_commonsense": {
+    "name": "小学常识",
+    "category": "Other"
+  },
+  "elementary_information_and_technology": {
+    "name": "小学信息技术",
+    "category": "Other"
+  },
+  "electrical_engineering": {
+    "name": "电气工程",
+    "category": "STEM"
+  },
+  "elementary_mathematics": {
+    "name": "初等数学",
+    "category": "STEM"
+  },
+  "ethnology": {
+    "name": "民族学",
+    "category": "Social Sciences"
+  },
+  "food_science": {
+    "name": "食品科学",
+    "category": "Other"
+  },
+  "genetics": {
+    "name": "遗传学",
+    "category": "STEM"
+  },
+  "global_facts": {
+    "name": "全球事实",
+    "category": "Humanities"
+  },
+  "high_school_biology": {
+    "name": "高中生物",
+    "category": "STEM"
+  },
+  "high_school_chemistry": {
+    "name": "高中化学",
+    "category": "STEM"
+  },
+  "high_school_geography": {
+    "name": "高中地理",
+    "category": "Social Sciences"
+  },
+  "high_school_mathematics": {
+    "name": "高中数学",
+    "category": "STEM"
+  },
+  "high_school_physics": {
+    "name": "高中物理学",
+    "category": "STEM"
+  },
+  "high_school_politics": {
+    "name": "高中政治",
+    "category": "Social Sciences"
+  },
+  "human_sexuality": {
+    "name": "人类性行为",
+    "category": "Other"
+  },
+  "international_law": {
+    "name": "国际法学",
+    "category": "Humanities"
+  },
+  "journalism": {
+    "name": "新闻学",
+    "category": "Social Sciences"
+  },
+  "jurisprudence": {
+    "name": "法理学",
+    "category": "Humanities"
+  },
+  "legal_and_moral_basis": {
+    "name": "法律与道德基础",
+    "category": "Other"
+  },
+  "logical": {
+    "name": "逻辑学",
+    "category": "Humanities"
+  },
+  "machine_learning": {
+    "name": "机器学习",
+    "category": "STEM"
+  },
+  "management": {
+    "name": "管理学",
+    "category": "Social Sciences"
+  },
+  "marketing": {
+    "name": "市场营销",
+    "category": "Social Sciences"
+  },
+  "marxist_theory": {
+    "name": "马克思主义理论",
+    "category": "Humanities"
+  },
+  "modern_chinese": {
+    "name": "现代汉语",
+    "category": "Social Sciences"
+  },
+  "nutrition": {
+    "name": "营养学",
+    "category": "Other"
+  },
+  "philosophy": {
+    "name": "哲学",
+    "category": "Humanities"
+  },
+  "professional_accounting": {
+    "name": "专业会计",
+    "category": "Social Sciences"
+  },
+  "professional_law": {
+    "name": "专业法学",
+    "category": "Humanities"
+  },
+  "professional_medicine": {
+    "name": "专业医学",
+    "category": "Other"
+  },
+  "professional_psychology": {
+    "name": "专业心理学",
+    "category": "Social Sciences"
+  },
+  "public_relations": {
+    "name": "公共关系",
+    "category": "Social Sciences"
+  },
+  "security_study": {
+    "name": "安全研究",
+    "category": "Social Sciences"
+  },
+  "sociology": {
+    "name": "社会学",
+    "category": "Social Sciences"
+  },
+  "sports_science": {
+    "name": "体育学",
+    "category": "Other"
+  },
+  "traditional_chinese_medicine": {
+    "name": "中医中药",
+    "category": "Other"
+  },
+  "virology": {
+    "name": "病毒学",
+    "category": "STEM"
+  },
+  "world_history": {
+    "name": "世界历史",
+    "category": "Humanities"
+  },
+  "world_religions": {
+    "name": "世界宗教",
+    "category": "Humanities"
+  }
+}
--- a/evaluation/mmlu/mapping.json
+++ b/evaluation/mmlu/mapping.json
@ -0,0 +1,230 @@
+{
+  "abstract_algebra": {
+    "name": "abstract algebra",
+    "category": "STEM"
+  },
+  "anatomy": {
+    "name": "anatomy",
+    "category": "Other"
+  },
+  "astronomy": {
+    "name": "astronomy",
+    "category": "STEM"
+  },
+  "business_ethics": {
+    "name": "business ethics",
+    "category": "Other"
+  },
+  "clinical_knowledge": {
+    "name": "clinical knowledge",
+    "category": "Other"
+  },
+  "college_biology": {
+    "name": "college biology",
+    "category": "STEM"
+  },
+  "college_chemistry": {
+    "name": "college chemistry",
+    "category": "STEM"
+  },
+  "college_computer_science": {
+    "name": "college computer science",
+    "category": "STEM"
+  },
+  "college_mathematics": {
+    "name": "college mathematics",
+    "category": "STEM"
+  },
+  "college_medicine": {
+    "name": "college medicine",
+    "category": "Other"
+  },
+  "college_physics": {
+    "name": "college physics",
+    "category": "STEM"
+  },
+  "computer_security": {
+    "name": "computer security",
+    "category": "STEM"
+  },
+  "conceptual_physics": {
+    "name": "conceptual physics",
+    "category": "STEM"
+  },
+  "econometrics": {
+    "name": "econometrics",
+    "category": "Social Sciences"
+  },
+  "electrical_engineering": {
+    "name": "electrical engineering",
+    "category": "STEM"
+  },
+  "elementary_mathematics": {
+    "name": "elementary mathematics",
+    "category": "STEM"
+  },
+  "formal_logic": {
+    "name": "formal logic",
+    "category": "Humanities"
+  },
+  "global_facts": {
+    "name": "global facts",
+    "category": "Other"
+  },
+  "high_school_biology": {
+    "name": "high school biology",
+    "category": "STEM"
+  },
+  "high_school_chemistry": {
+    "name": "high school chemistry",
+    "category": "STEM"
+  },
+  "high_school_computer_science": {
+    "name": "high school computer science",
+    "category": "STEM"
+  },
+  "high_school_european_history": {
+    "name": "high school european history",
+    "category": "Humanities"
+  },
+  "high_school_geography": {
+    "name": "high school geography",
+    "category": "Social Sciences"
+  },
+  "high_school_government_and_politics": {
+    "name": "high school government and politics",
+    "category": "Social Sciences"
+  },
+  "high_school_macroeconomics": {
+    "name": "high school macroeconomics",
+    "category": "Social Sciences"
+  },
+  "high_school_mathematics": {
+    "name": "high school mathematics",
+    "category": "STEM"
+  },
+  "high_school_microeconomics": {
+    "name": "high school microeconomics",
+    "category": "Social Sciences"
+  },
+  "high_school_physics": {
+    "name": "high school physics",
+    "category": "STEM"
+  },
+  "high_school_psychology": {
+    "name": "high school psychology",
+    "category": "Social Sciences"
+  },
+  "high_school_statistics": {
+    "name": "high school statistics",
+    "category": "STEM"
+  },
+  "high_school_us_history": {
+    "name": "high school us history",
+    "category": "Humanities"
+  },
+  "high_school_world_history": {
+    "name": "high school world history",
+    "category": "Humanities"
+  },
+  "human_aging": {
+    "name": "human aging",
+    "category": "Other"
+  },
+  "human_sexuality": {
+    "name": "human sexuality",
+    "category": "Social Sciences"
+  },
+  "international_law": {
+    "name": "international law",
+    "category": "Humanities"
+  },
+  "jurisprudence": {
+    "name": "jurisprudence",
+    "category": "Humanities"
+  },
+  "logical_fallacies": {
+    "name": "logical fallacies",
+    "category": "Humanities"
+  },
+  "machine_learning": {
+    "name": "machine learning",
+    "category": "STEM"
+  },
+  "management": {
+    "name": "management",
+    "category": "Other"
+  },
+  "marketing": {
+    "name": "marketing",
+    "category": "Other"
+  },
+  "medical_genetics": {
+    "name": "medical genetics",
+    "category": "Other"
+  },
+  "miscellaneous": {
+    "name": "miscellaneous",
+    "category": "Other"
+  },
+  "moral_disputes": {
+    "name": "moral disputes",
+    "category": "Humanities"
+  },
+  "moral_scenarios": {
+    "name": "moral scenarios",
+    "category": "Humanities"
+  },
+  "nutrition": {
+    "name": "nutrition",
+    "category": "Other"
+  },
+  "philosophy": {
+    "name": "philosophy",
+    "category": "Humanities"
+  },
+  "prehistory": {
+    "name": "prehistory",
+    "category": "Humanities"
+  },
+  "professional_accounting": {
+    "name": "professional accounting",
+    "category": "Other"
+  },
+  "professional_law": {
+    "name": "professional law",
+    "category": "Humanities"
+  },
+  "professional_medicine": {
+    "name": "professional medicine",
+    "category": "Other"
+  },
+  "professional_psychology": {
+    "name": "professional psychology",
+    "category": "Social Sciences"
+  },
+  "public_relations": {
+    "name": "public relations",
+    "category": "Social Sciences"
+  },
+  "security_studies": {
+    "name": "security studies",
+    "category": "Social Sciences"
+  },
+  "sociology": {
+    "name": "sociology",
+    "category": "Social Sciences"
+  },
+  "us_foreign_policy": {
+    "name": "us foreign policy",
+    "category": "Social Sciences"
+  },
+  "virology": {
+    "name": "virology",
+    "category": "Other"
+  },
+  "world_religions": {
+    "name": "world religions",
+    "category": "Humanities"
+  }
+}
--- a/evaluation/mmlu/mmlu.py
+++ b/evaluation/mmlu/mmlu.py
@ -0,0 +1,167 @@
+# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+
+import datasets
+import pandas as pd
+
+
+_CITATION = """\
+@article{hendryckstest2021,
+  title={Measuring Massive Multitask Language Understanding},
+  author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
+  journal={Proceedings of the International Conference on Learning Representations (ICLR)},
+  year={2021}
+}
+"""
+
+_DESCRIPTION = """\
+Measuring Massive Multitask Language Understanding by Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt (ICLR 2021).
+"""
+
+_HOMEPAGE = "https://github.com/hendrycks/test"
+
+_LICENSE = "MIT"
+
+_URL = "mmlu.zip"
+
+task_list = [
+    "high_school_european_history",
+    "business_ethics",
+    "clinical_knowledge",
+    "medical_genetics",
+    "high_school_us_history",
+    "high_school_physics",
+    "high_school_world_history",
+    "virology",
+    "high_school_microeconomics",
+    "econometrics",
+    "college_computer_science",
+    "high_school_biology",
+    "abstract_algebra",
+    "professional_accounting",
+    "philosophy",
+    "professional_medicine",
+    "nutrition",
+    "global_facts",
+    "machine_learning",
+    "security_studies",
+    "public_relations",
+    "professional_psychology",
+    "prehistory",
+    "anatomy",
+    "human_sexuality",
+    "college_medicine",
+    "high_school_government_and_politics",
+    "college_chemistry",
+    "logical_fallacies",
+    "high_school_geography",
+    "elementary_mathematics",
+    "human_aging",
+    "college_mathematics",
+    "high_school_psychology",
+    "formal_logic",
+    "high_school_statistics",
+    "international_law",
+    "high_school_mathematics",
+    "high_school_computer_science",
+    "conceptual_physics",
+    "miscellaneous",
+    "high_school_chemistry",
+    "marketing",
+    "professional_law",
+    "management",
+    "college_physics",
+    "jurisprudence",
+    "world_religions",
+    "sociology",
+    "us_foreign_policy",
+    "high_school_macroeconomics",
+    "computer_security",
+    "moral_scenarios",
+    "moral_disputes",
+    "electrical_engineering",
+    "astronomy",
+    "college_biology",
+]
+
+
+class MMLUConfig(datasets.BuilderConfig):
+    def __init__(self, **kwargs):
+        super().__init__(version=datasets.Version("1.0.0"), **kwargs)
+
+
+class MMLU(datasets.GeneratorBasedBuilder):
+    BUILDER_CONFIGS = [
+        MMLUConfig(
+            name=task_name,
+        )
+        for task_name in task_list
+    ]
+
+    def _info(self):
+        features = datasets.Features(
+            {
+                "question": datasets.Value("string"),
+                "A": datasets.Value("string"),
+                "B": datasets.Value("string"),
+                "C": datasets.Value("string"),
+                "D": datasets.Value("string"),
+                "answer": datasets.Value("string"),
+            }
+        )
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=features,
+            homepage=_HOMEPAGE,
+            license=_LICENSE,
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        data_dir = dl_manager.download_and_extract(_URL)
+        task_name = self.config.name
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST,
+                gen_kwargs={
+                    "filepath": os.path.join(
+                        data_dir, "data", "test", f"{task_name}_test.csv"
+                    ),
+                },
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.VALIDATION,
+                gen_kwargs={
+                    "filepath": os.path.join(
+                        data_dir, "data", "val", f"{task_name}_val.csv"
+                    ),
+                },
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={
+                    "filepath": os.path.join(
+                        data_dir, "data", "dev", f"{task_name}_dev.csv"
+                    ),
+                },
+            ),
+        ]
+
+    def _generate_examples(self, filepath):
+        df = pd.read_csv(filepath)
+        df.columns = ["question", "A", "B", "C", "D", "answer"]
+
+        for i, instance in enumerate(df.to_dict(orient="records")):
+            yield i, instance
--- a/evaluation/mmlu/mmlu.zip
+++ b/evaluation/mmlu/mmlu.zip
--- a/examples/README.md
+++ b/examples/README.md
@ -0,0 +1,50 @@
+We provide diverse examples about fine-tuning LLMs.
+
+```
+examples/
+├── lora_single_gpu/
+│   ├── pretrain.sh: Do continuous pre-training using LoRA
+│   ├── sft.sh: Do supervised fine-tuning using LoRA
+│   ├── reward.sh: Do reward modeling using LoRA
+│   ├── ppo.sh: Do PPO training using LoRA
+│   ├── dpo.sh: Do DPO training using LoRA
+│   ├── orpo.sh: Do ORPO training using LoRA
+│   ├── sft_mllm.sh: Do supervised fine-tuning on multimodal data using LoRA
+│   ├── prepare.sh: Save tokenized dataset
+│   └── predict.sh: Do batch predict and compute BLEU and ROUGE scores after LoRA tuning
+├── qlora_single_gpu/
+│   ├── bitsandbytes.sh: Fine-tune 4/8-bit BNB models using QLoRA
+│   ├── gptq.sh: Fine-tune 4/8-bit GPTQ models using QLoRA
+│   ├── awq.sh: Fine-tune 4-bit AWQ models using QLoRA
+│   └── aqlm.sh: Fine-tune 2-bit AQLM models using QLoRA
+├── lora_multi_gpu/
+│   ├── single_node.sh: Fine-tune model with Accelerate on single node using LoRA
+│   ├── multi_node.sh: Fine-tune model with Accelerate on multiple nodes using LoRA
+│   └── ds_zero3.sh: Fine-tune model with DeepSpeed ZeRO-3 using LoRA (weight sharding)
+├── full_multi_gpu/
+│   ├── single_node.sh: Full fine-tune model with DeepSpeed on single node
+│   ├── multi_node.sh: Full fine-tune model with DeepSpeed on multiple nodes
+│   └── predict.sh: Do parallel batch predict and compute BLEU and ROUGE scores after full tuning
+├── merge_lora/
+│   ├── merge.sh: Merge LoRA weights into the pre-trained models
+│   └── quantize.sh: Quantize the fine-tuned model with AutoGPTQ
+├── inference/
+│   ├── cli_demo.sh: Chat with fine-tuned model in the CLI with LoRA adapters
+│   ├── api_demo.sh: Chat with fine-tuned model in an OpenAI-style API with LoRA adapters
+│   ├── web_demo.sh: Chat with fine-tuned model in the Web browser with LoRA adapters
+│   └── evaluate.sh: Evaluate model on the MMLU/CMMLU/C-Eval benchmarks with LoRA adapters
+└── extras/
+    ├── galore/
+    │   └── sft.sh: Fine-tune model with GaLore
+    ├── badam/
+    │   └── sft.sh: Fine-tune model with BAdam
+    ├── loraplus/
+    │   └── sft.sh: Fine-tune model using LoRA+
+    ├── mod/
+    │   └── sft.sh: Fine-tune model using Mixture-of-Depths
+    ├── llama_pro/
+    │   ├── expand.sh: Expand layers in the model
+    │   └── sft.sh: Fine-tune the expanded model
+    └── fsdp_qlora/
+        └── sft.sh: Fine-tune quantized model with FSDP+QLoRA
+```
--- a/examples/README_zh.md
+++ b/examples/README_zh.md
@ -0,0 +1,50 @@
+我们提供了多样化的大模型微调示例脚本。
+
+```
+examples/
+├── lora_single_gpu/
+│   ├── pretrain.sh: 基于 LoRA 进行增量预训练
+│   ├── sft.sh: 基于 LoRA 进行指令监督微调
+│   ├── reward.sh: 基于 LoRA 进行奖励模型训练
+│   ├── ppo.sh: 基于 LoRA 进行 PPO 训练
+│   ├── dpo.sh: 基于 LoRA 进行 DPO 训练
+│   ├── orpo.sh: 基于 LoRA 进行 ORPO 训练
+│   ├── sft_mllm.sh: 基于 LoRA 进行多模态指令监督微调
+│   ├── prepare.sh: 保存预处理后的数据集
+│   └── predict.sh: 基于 LoRA 进行批量预测并计算 BLEU 和 ROUGE 分数
+├── qlora_single_gpu/
+│   ├── bitsandbytes.sh: 基于 QLoRA 微调 4/8 比特 BNB 模型
+│   ├── gptq.sh: 基于 QLoRA 微调 4/8 比特 GPTQ 模型
+│   ├── awq.sh: 基于 QLoRA 微调 4 比特 AWQ 模型
+│   └── aqlm.sh: 基于 QLoRA 微调 2 比特 AQLM 模型
+├── lora_multi_gpu/
+│   ├── single_node.sh: 使用 Accelerate 进行单节点 LoRA 训练
+│   ├── multi_node.sh: 使用 Accelerate 进行多节点 LoRA 训练
+│   └── ds_zero3.sh: 使用 DeepSpeed ZeRO-3 进行 LoRA 训练（拆分权重）
+├── full_multi_gpu/
+│   ├── single_node.sh: 使用 DeepSpeed 进行单节点全量训练
+│   ├── multi_node.sh: 使用 DeepSpeed 进行多节点全量训练
+│   └── predict.sh: 基于全量训练进行多卡批量预测并计算 BLEU 和 ROUGE 分数
+├── merge_lora/
+│   ├── merge.sh: 将 LoRA 权重合并到预训练模型中
+│   └── quantize.sh: 使用 AutoGPTQ 量化微调后的模型
+├── inference/
+│   ├── cli_demo.sh: 启动 LoRA 模型的命令行推理接口
+│   ├── api_demo.sh: 启动 LoRA 模型的 OpenAI 风格 API
+│   ├── web_demo.sh: 启动 LoRA 模型的浏览器推理接口
+│   └── evaluate.sh: 在 MMLU/CMMLU/C-Eval 数据集上评测 LoRA 模型
+└── extras/
+    ├── galore/
+    │   └── sft.sh: 使用 GaLore 训练模型
+    ├── badam/
+    │   └── sft.sh: 使用 BAdam 训练模型
+    ├── loraplus/
+    │   └── sft.sh: 使用 LoRA+ 训练模型
+    ├── mod/
+    │   └── sft.sh: 使用深度混合训练模型
+    ├── llama_pro/
+    │   ├── expand.sh: 扩展模型中的层
+    │   └── sft.sh: 训练扩展后的模型
+    └── fsdp_qlora/
+        └── sft.sh: 使用 FSDP+QLoRA 微调量化模型
+```
--- a/examples/accelerate/fsdp_config.yaml
+++ b/examples/accelerate/fsdp_config.yaml
@ -0,0 +1,25 @@
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: FSDP
+downcast_bf16: 'no'
+fsdp_config:
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch: BACKWARD_PRE
+  fsdp_cpu_ram_efficient_loading: true
+  fsdp_forward_prefetch: false
+  fsdp_offload_params: true
+  fsdp_sharding_strategy: FULL_SHARD
+  fsdp_state_dict_type: FULL_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_use_orig_params: false
+machine_rank: 0
+main_training_function: main
+mixed_precision: fp16
+num_machines: 1 # the number of nodes
+num_processes: 2 # the number of GPUs in all nodes
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
--- a/examples/accelerate/master_config.yaml
+++ b/examples/accelerate/master_config.yaml
@ -0,0 +1,18 @@
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: MULTI_GPU
+downcast_bf16: 'no'
+gpu_ids: all
+machine_rank: 0
+main_process_ip: 192.168.0.1
+main_process_port: 29555
+main_training_function: main
+mixed_precision: fp16
+num_machines: 2 # the number of nodes
+num_processes: 8 # the number of GPUs in all nodes
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
--- a/examples/accelerate/single_config.yaml
+++ b/examples/accelerate/single_config.yaml
@ -0,0 +1,16 @@
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: MULTI_GPU
+downcast_bf16: 'no'
+gpu_ids: all
+machine_rank: 0
+main_training_function: main
+mixed_precision: fp16
+num_machines: 1 # the number of nodes
+num_processes: 4 # the number of GPUs in all nodes
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
--- a/examples/accelerate/slave_config.yaml
+++ b/examples/accelerate/slave_config.yaml
@ -0,0 +1,18 @@
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: MULTI_GPU
+downcast_bf16: 'no'
+gpu_ids: all
+machine_rank: 1
+main_process_ip: 192.168.0.1
+main_process_port: 29555
+main_training_function: main
+mixed_precision: fp16
+num_machines: 2 # the number of nodes
+num_processes: 8 # the number of GPUs in all nodes
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
--- a/examples/deepspeed/ds_z2_config.json
+++ b/examples/deepspeed/ds_z2_config.json
@ -0,0 +1,28 @@
+{
+  "train_batch_size": "auto",
+  "train_micro_batch_size_per_gpu": "auto",
+  "gradient_accumulation_steps": "auto",
+  "gradient_clipping": "auto",
+  "zero_allow_untested_optimizer": true,
+  "fp16": {
+    "enabled": "auto",
+    "loss_scale": 0,
+    "loss_scale_window": 1000,
+    "initial_scale_power": 16,
+    "hysteresis": 2,
+    "min_loss_scale": 1
+  },
+  "bf16": {
+    "enabled": "auto"
+  },
+  "zero_optimization": {
+    "stage": 2,
+    "allgather_partitions": true,
+    "allgather_bucket_size": 5e8,
+    "overlap_comm": true,
+    "reduce_scatter": true,
+    "reduce_bucket_size": 5e8,
+    "contiguous_gradients": true,
+    "round_robin_gradients": true
+  }
+}
--- a/examples/deepspeed/ds_z2_offload_config.json
+++ b/examples/deepspeed/ds_z2_offload_config.json
@ -0,0 +1,32 @@
+{
+  "train_batch_size": "auto",
+  "train_micro_batch_size_per_gpu": "auto",
+  "gradient_accumulation_steps": "auto",
+  "gradient_clipping": "auto",
+  "zero_allow_untested_optimizer": true,
+  "fp16": {
+    "enabled": "auto",
+    "loss_scale": 0,
+    "loss_scale_window": 1000,
+    "initial_scale_power": 16,
+    "hysteresis": 2,
+    "min_loss_scale": 1
+  },
+  "bf16": {
+    "enabled": "auto"
+  },
+  "zero_optimization": {
+    "stage": 2,
+    "offload_optimizer": {
+      "device": "cpu",
+      "pin_memory": true
+    },
+    "allgather_partitions": true,
+    "allgather_bucket_size": 5e8,
+    "overlap_comm": true,
+    "reduce_scatter": true,
+    "reduce_bucket_size": 5e8,
+    "contiguous_gradients": true,
+    "round_robin_gradients": true
+  }
+}
--- a/examples/deepspeed/ds_z3_config.json
+++ b/examples/deepspeed/ds_z3_config.json
@ -0,0 +1,30 @@
+{
+  "train_batch_size": "auto",
+  "train_micro_batch_size_per_gpu": "auto",
+  "gradient_accumulation_steps": "auto",
+  "gradient_clipping": "auto",
+  "zero_allow_untested_optimizer": true,
+  "fp16": {
+    "enabled": "auto",
+    "loss_scale": 0,
+    "loss_scale_window": 1000,
+    "initial_scale_power": 16,
+    "hysteresis": 2,
+    "min_loss_scale": 1
+  },
+  "bf16": {
+    "enabled": "auto"
+  },
+  "zero_optimization": {
+    "stage": 3,
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+  }
+}
--- a/examples/deepspeed/ds_z3_offload_config.json
+++ b/examples/deepspeed/ds_z3_offload_config.json
@ -0,0 +1,38 @@
+{
+  "train_batch_size": "auto",
+  "train_micro_batch_size_per_gpu": "auto",
+  "gradient_accumulation_steps": "auto",
+  "gradient_clipping": "auto",
+  "zero_allow_untested_optimizer": true,
+  "fp16": {
+    "enabled": "auto",
+    "loss_scale": 0,
+    "loss_scale_window": 1000,
+    "initial_scale_power": 16,
+    "hysteresis": 2,
+    "min_loss_scale": 1
+  },
+  "bf16": {
+    "enabled": "auto"
+  },
+  "zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+      "device": "cpu",
+      "pin_memory": true
+    },
+    "offload_param": {
+      "device": "cpu",
+      "pin_memory": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 1e9,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 1e9,
+    "stage3_max_reuse_distance": 1e9,
+    "stage3_gather_16bit_weights_on_model_save": true
+  }
+}
--- a/examples/extras/badam/sft.sh
+++ b/examples/extras/badam/sft.sh
@ -0,0 +1,35 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \
+    --stage sft \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../../data \
+    --template default \
+    --finetuning_type full \
+    --use_badam \
+    --badam_switch_mode descending \
+    --badam_switch_block_every 50 \
+    --badam_verbose 2 \
+    --output_dir ../../../saves/LLaMA2-7B/badam/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --plot_loss \
+    --pure_bf16
--- a/examples/extras/fsdp_qlora/sft.sh
+++ b/examples/extras/fsdp_qlora/sft.sh
@ -0,0 +1,41 @@
+#!/bin/bash
+# DO NOT use GPTQ/AWQ model in FSDP+QLoRA
+
+pip install "transformers>=4.39.1"
+pip install "accelerate>=0.28.0"
+pip install "bitsandbytes>=0.43.0"
+
+CUDA_VISIBLE_DEVICES=0,1 accelerate launch \
+    --config_file ../../accelerate/fsdp_config.yaml \
+    ../../../src/train_bash.py \
+    --stage sft \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-70b-hf \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../../data \
+    --template default \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../../saves/LLaMA2-70B/lora/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 4 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --ddp_timeout 180000000 \
+    --quantization_bit 4 \
+    --plot_loss \
+    --fp16
--- a/examples/extras/galore/sft.sh
+++ b/examples/extras/galore/sft.sh
@ -0,0 +1,36 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \
+    --stage sft \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../../data \
+    --template default \
+    --finetuning_type full \
+    --use_galore \
+    --galore_layerwise \
+    --galore_target mlp,self_attn \
+    --galore_rank 128 \
+    --galore_scale 2.0 \
+    --output_dir ../../../saves/LLaMA2-7B/galore/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 1 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --plot_loss \
+    --pure_bf16
--- a/examples/extras/llama_pro/expand.sh
+++ b/examples/extras/llama_pro/expand.sh
@ -0,0 +1,6 @@
+#!/bin/bash
+
+python ../../../scripts/llama_pro.py \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --output_dir ../../../models/llama2-7b-pro \
+    --num_expand 8
--- a/examples/extras/llama_pro/sft.sh
+++ b/examples/extras/llama_pro/sft.sh
@ -0,0 +1,34 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \
+    --stage sft \
+    --do_train \
+    --model_name_or_path ../../../models/llama2-7b-pro \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../../data \
+    --template default \
+    --finetuning_type freeze \
+    --name_module_trainable all \
+    --num_layer_trainable 8 \
+    --use_llama_pro \
+    --output_dir ../../../saves/LLaMA2-7B-Pro/lora/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --plot_loss \
+    --fp16
--- a/examples/extras/loraplus/sft.sh
+++ b/examples/extras/loraplus/sft.sh
@ -0,0 +1,33 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
+    --stage sft \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --loraplus_lr_ratio 16.0 \
+    --output_dir ../../saves/LLaMA2-7B/loraplus/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --plot_loss \
+    --fp16
--- a/examples/extras/mod/sft.sh
+++ b/examples/extras/mod/sft.sh
@ -0,0 +1,33 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../../src/train_bash.py \
+    --stage sft \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../../data \
+    --template default \
+    --finetuning_type full \
+    --mixture_of_depths convert \
+    --output_dir ../../../saves/LLaMA2-7B/mod/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --optim paged_adamw_8bit \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --plot_loss \
+    --pure_bf16
--- a/examples/full_multi_gpu/multi_node.sh
+++ b/examples/full_multi_gpu/multi_node.sh
@ -0,0 +1,38 @@
+#!/bin/bash
+
+python -m torch.distributed.run \
+    --nproc_per_node $NPROC_PER_NODE \
+    --nnodes $NNODES \
+    --node_rank $RANK \
+    --master_addr $MASTER_ADDR \
+    --master_port $MASTER_PORT \
+    ../../src/train_bash.py \
+    --deepspeed ../deepspeed/ds_z3_config.json \
+    --stage sft \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type full \
+    --output_dir ../../saves/LLaMA2-7B/full/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 2 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --ddp_timeout 180000000 \
+    --plot_loss \
+    --fp16
--- a/examples/full_multi_gpu/predict.sh
+++ b/examples/full_multi_gpu/predict.sh
@ -0,0 +1,20 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
+    --config_file ../accelerate/single_config.yaml \
+    ../../src/train_bash.py \
+    --stage sft \
+    --do_predict \
+    --model_name_or_path ../../saves/LLaMA2-7B/full/sft \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type full \
+    --output_dir ../../saves/LLaMA2-7B/full/predict \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_eval_batch_size 1 \
+    --max_samples 20 \
+    --predict_with_generate
--- a/examples/full_multi_gpu/single_node.sh
+++ b/examples/full_multi_gpu/single_node.sh
@ -0,0 +1,32 @@
+#!/bin/bash
+
+deepspeed --num_gpus 4 ../../src/train_bash.py \
+    --deepspeed ../deepspeed/ds_z3_config.json \
+    --stage sft \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type full \
+    --output_dir ../../saves/LLaMA2-7B/full/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 2 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --ddp_timeout 180000000 \
+    --plot_loss \
+    --fp16
--- a/examples/inference/api_demo.sh
+++ b/examples/inference/api_demo.sh
@ -0,0 +1,7 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 API_PORT=8000 python ../../src/api_demo.py \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
+    --template default \
+    --finetuning_type lora
--- a/examples/inference/cli_demo.sh
+++ b/examples/inference/cli_demo.sh
@ -0,0 +1,7 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/cli_demo.py \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
+    --template default \
+    --finetuning_type lora
--- a/examples/inference/evaluate.sh
+++ b/examples/inference/evaluate.sh
@ -0,0 +1,12 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/evaluate.py \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
+    --template fewshot \
+    --finetuning_type lora \
+    --task mmlu \
+    --split test \
+    --lang en \
+    --n_shot 5 \
+    --batch_size 4
--- a/examples/inference/web_demo.sh
+++ b/examples/inference/web_demo.sh
@ -0,0 +1,8 @@
+#!/bin/bash
+# add `--visual_inputs True` to load MLLM
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/web_demo.py \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
+    --template default \
+    --finetuning_type lora
--- a/examples/lora_multi_gpu/ds_zero3.sh
+++ b/examples/lora_multi_gpu/ds_zero3.sh
@ -0,0 +1,33 @@
+#!/bin/bash
+
+deepspeed --num_gpus 4 ../../src/train_bash.py \
+    --deepspeed ../deepspeed/ds_z3_config.json \
+    --stage sft \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../saves/LLaMA2-7B/lora/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 2 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --ddp_timeout 180000000 \
+    --plot_loss \
+    --fp16
--- a/examples/lora_multi_gpu/multi_node.sh
+++ b/examples/lora_multi_gpu/multi_node.sh
@ -0,0 +1,36 @@
+#!/bin/bash
+# also launch it on slave machine using slave_config.yaml
+
+CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
+    --config_file ../accelerate/master_config.yaml \
+    ../../src/train_bash.py \
+    --stage sft \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../saves/LLaMA2-7B/lora/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 2 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --ddp_timeout 180000000 \
+    --plot_loss \
+    --fp16
--- a/examples/lora_multi_gpu/single_node.sh
+++ b/examples/lora_multi_gpu/single_node.sh
@ -0,0 +1,35 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch \
+    --config_file ../accelerate/single_config.yaml \
+    ../../src/train_bash.py \
+    --stage sft \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../saves/LLaMA2-7B/lora/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 2 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --ddp_timeout 180000000 \
+    --plot_loss \
+    --fp16
--- a/examples/lora_single_gpu/dpo.sh
+++ b/examples/lora_single_gpu/dpo.sh
@ -0,0 +1,35 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
+    --stage dpo \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
+    --create_new_adapter \
+    --dataset orca_rlhf \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../saves/LLaMA2-7B/lora/dpo \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 1e-5 \
+    --num_train_epochs 1.0 \
+    --max_samples 1000 \
+    --val_size 0.1 \
+    --dpo_ftx 1.0 \
+    --plot_loss \
+    --fp16
--- a/examples/lora_single_gpu/orpo.sh
+++ b/examples/lora_single_gpu/orpo.sh
@ -0,0 +1,32 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
+    --stage orpo \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --dataset orca_rlhf \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../saves/LLaMA2-7B/lora/orpo \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 1e-5 \
+    --num_train_epochs 1.0 \
+    --max_samples 1000 \
+    --val_size 0.1 \
+    --plot_loss \
+    --fp16
--- a/examples/lora_single_gpu/ppo.sh
+++ b/examples/lora_single_gpu/ppo.sh
@ -0,0 +1,32 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
+    --stage ppo \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
+    --create_new_adapter \
+    --dataset alpaca_gpt4_en \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --reward_model ../../saves/LLaMA2-7B/lora/reward \
+    --output_dir ../../saves/LLaMA2-7B/lora/ppo \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 512 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --save_steps 100 \
+    --learning_rate 1e-5 \
+    --num_train_epochs 1.0 \
+    --max_samples 1000 \
+    --top_k 0 \
+    --top_p 0.9 \
+    --max_new_tokens 256 \
+    --plot_loss \
+    --fp16
--- a/examples/lora_single_gpu/predict.sh
+++ b/examples/lora_single_gpu/predict.sh
@ -0,0 +1,19 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
+    --stage sft \
+    --do_predict \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft,../../saves/LLaMA2-7B/lora/dpo \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type lora \
+    --output_dir ../../saves/LLaMA2-7B/lora/predict \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_eval_batch_size 1 \
+    --max_samples 20 \
+    --predict_with_generate
--- a/examples/lora_single_gpu/prepare.sh
+++ b/examples/lora_single_gpu/prepare.sh
@ -0,0 +1,18 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES= python ../../src/train_bash.py \
+    --stage sft \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../saves/LLaMA2-7B/lora/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --max_samples 3000 \
+    --tokenized_path ../../saves/datasets/sft
--- a/examples/lora_single_gpu/pretrain.sh
+++ b/examples/lora_single_gpu/pretrain.sh
@ -0,0 +1,31 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
+    --stage pt \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --dataset c4_demo \
+    --dataset_dir ../../data \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../saves/LLaMA2-7B/lora/pretrain \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 10000 \
+    --val_size 0.1 \
+    --plot_loss \
+    --fp16
--- a/examples/lora_single_gpu/reward.sh
+++ b/examples/lora_single_gpu/reward.sh
@ -0,0 +1,33 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
+    --stage rm \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
+    --create_new_adapter \
+    --dataset orca_rlhf \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../saves/LLaMA2-7B/lora/reward \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --learning_rate 1e-5 \
+    --num_train_epochs 1.0 \
+    --max_samples 5000 \
+    --val_size 0.1 \
+    --plot_loss \
+    --fp16
--- a/examples/lora_single_gpu/sft.sh
+++ b/examples/lora_single_gpu/sft.sh
@ -0,0 +1,32 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
+    --stage sft \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../saves/LLaMA2-7B/lora/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --plot_loss \
+    --fp16
--- a/examples/lora_single_gpu/sft_mllm.sh
+++ b/examples/lora_single_gpu/sft_mllm.sh
@ -0,0 +1,33 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
+    --stage sft \
+    --do_train \
+    --model_name_or_path llava-hf/llava-1.5-7b-hf \
+    --visual_inputs \
+    --dataset mllm_demo \
+    --dataset_dir ../../data \
+    --template vicuna \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../saves/LLaMA2-7B/lora/sft_mllm \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --preprocessing_num_workers 16 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --warmup_steps 20 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 5e-5 \
+    --num_train_epochs 100.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --plot_loss \
+    --fp16
--- a/examples/merge_lora/merge.sh
+++ b/examples/merge_lora/merge.sh
@ -0,0 +1,12 @@
+#!/bin/bash
+# DO NOT use quantized model or quantization_bit when merging lora weights
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/export_model.py \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
+    --template default \
+    --finetuning_type lora \
+    --export_dir ../../models/llama2-7b-sft \
+    --export_size 2 \
+    --export_device cpu \
+    --export_legacy_format False
--- a/examples/merge_lora/quantize.sh
+++ b/examples/merge_lora/quantize.sh
@ -0,0 +1,11 @@
+#!/bin/bash
+# NEED TO run `merge.sh` before using this script
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/export_model.py \
+    --model_name_or_path ../../models/llama2-7b-sft \
+    --template default \
+    --export_dir ../../models/llama2-7b-sft-int4 \
+    --export_quantization_bit 4 \
+    --export_quantization_dataset ../../data/c4_demo.json \
+    --export_size 2 \
+    --export_legacy_format False
--- a/examples/qlora_single_gpu/aqlm.sh
+++ b/examples/qlora_single_gpu/aqlm.sh
@ -0,0 +1,30 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
+    --stage sft \
+    --do_train \
+    --model_name_or_path BlackSamorez/Llama-2-7b-AQLM-2Bit-1x16-hf \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../saves/LLaMA2-7B/lora/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --plot_loss \
+    --fp16
--- a/examples/qlora_single_gpu/awq.sh
+++ b/examples/qlora_single_gpu/awq.sh
@ -0,0 +1,30 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
+    --stage sft \
+    --do_train \
+    --model_name_or_path TheBloke/Llama-2-7B-AWQ \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../saves/LLaMA2-7B/lora/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --plot_loss \
+    --fp16
--- a/examples/qlora_single_gpu/bitsandbytes.sh
+++ b/examples/qlora_single_gpu/bitsandbytes.sh
@ -0,0 +1,31 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
+    --stage sft \
+    --do_train \
+    --model_name_or_path meta-llama/Llama-2-7b-hf \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../saves/LLaMA2-7B/lora/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --quantization_bit 4 \
+    --plot_loss \
+    --fp16
--- a/examples/qlora_single_gpu/gptq.sh
+++ b/examples/qlora_single_gpu/gptq.sh
@ -0,0 +1,30 @@
+#!/bin/bash
+
+CUDA_VISIBLE_DEVICES=0 python ../../src/train_bash.py \
+    --stage sft \
+    --do_train \
+    --model_name_or_path TheBloke/Llama-2-7B-GPTQ \
+    --dataset alpaca_gpt4_en,glaive_toolcall \
+    --dataset_dir ../../data \
+    --template default \
+    --finetuning_type lora \
+    --lora_target q_proj,v_proj \
+    --output_dir ../../saves/LLaMA2-7B/lora/sft \
+    --overwrite_cache \
+    --overwrite_output_dir \
+    --cutoff_len 1024 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 8 \
+    --lr_scheduler_type cosine \
+    --logging_steps 10 \
+    --save_steps 100 \
+    --eval_steps 100 \
+    --evaluation_strategy steps \
+    --load_best_model_at_end \
+    --learning_rate 5e-5 \
+    --num_train_epochs 3.0 \
+    --max_samples 3000 \
+    --val_size 0.1 \
+    --plot_loss \
+    --fp16
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,33 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[tool.ruff]
+target-version = "py38"
+line-length = 119
+indent-width = 4
+
+[tool.ruff.lint]
+ignore = ["C408", "C901", "E501", "E731", "E741", "W605"]
+select = ["C", "E", "F", "I", "W"]
+
+[tool.ruff.lint.isort]
+lines-after-imports = 2
+known-first-party = ["llmtuner"]
+known-third-party = [
+    "accelerate",
+    "datasets",
+    "gradio",
+    "numpy",
+    "peft",
+    "torch",
+    "transformers",
+    "trl"
+]
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+docstring-code-format = true
+skip-magic-trailing-comma = false
+line-ending = "auto"
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,18 @@
+torch>=1.13.1
+transformers>=4.37.2
+datasets>=2.14.3
+accelerate>=0.27.2
+peft>=0.10.0
+trl>=0.8.1
+gradio>=4.0.0
+scipy
+einops
+sentencepiece
+protobuf
+uvicorn
+pydantic
+fastapi
+sse-starlette
+matplotlib
+fire
+packaging
--- a/scripts/cal_flops.py
+++ b/scripts/cal_flops.py
@ -0,0 +1,33 @@
+# coding=utf-8
+# Calculates the flops of pre-trained models.
+# Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
+# Inspired by: https://www.deepspeed.ai/tutorials/flops-profiler/
+
+from typing import Optional
+
+import fire
+import torch
+from deepspeed.accelerator import get_accelerator  # type: ignore
+from deepspeed.profiling.flops_profiler import get_model_profile  # type: ignore
+
+from llmtuner import ChatModel
+
+
+def calculate_flops(
+    model_name_or_path: str,
+    batch_size: Optional[int] = 1,
+    seq_length: Optional[int] = 256,
+    flash_attn: Optional[bool] = False,
+):
+    with get_accelerator().device(0):
+        chat_model = ChatModel(dict(model_name_or_path=model_name_or_path, template="vanilla", flash_attn=flash_attn))
+        fake_input = torch.ones((batch_size, seq_length), dtype=torch.long, device=chat_model.model.device)
+        input_dict = {"input_ids": fake_input, "labels": fake_input.clone()}
+        flops, macs, params = get_model_profile(chat_model.model, kwargs=input_dict, print_profile=True, detailed=True)
+        print("FLOPs:", flops)
+        print("MACs:", macs)
+        print("Params:", params)
+
+
+if __name__ == "__main__":
+    fire.Fire(calculate_flops)
--- a/scripts/cal_lr.py
+++ b/scripts/cal_lr.py
@ -0,0 +1,78 @@
+# coding=utf-8
+# Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
+# Usage: python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en --cutoff_len 1024 --batch_size 16
+# Inspired by: https://github.com/imoneoi/openchat/blob/master/ochat/training_deepspeed/train.py
+
+import math
+from typing import Optional
+
+import fire
+import torch
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from transformers import DataCollatorForLanguageModeling, DataCollatorForSeq2Seq
+
+from llmtuner.data import get_dataset
+from llmtuner.extras.constants import IGNORE_INDEX
+from llmtuner.hparams import get_train_args
+from llmtuner.model import load_tokenizer
+
+
+BASE_LR = 3e-4  # 1.5e-4 for 30B-70B models
+BASE_BS = 4_000_000  # from llama paper
+
+
+def calculate_lr(
+    model_name_or_path: str,
+    batch_size: int,  # total batch size, namely (batch size * gradient accumulation * world size)
+    stage: Optional[str] = "sft",
+    dataset: Optional[str] = "alpaca_en",
+    dataset_dir: Optional[str] = "data",
+    template: Optional[str] = "default",
+    cutoff_len: Optional[int] = 1024,  # i.e. maximum input length during training
+    is_mistral: Optional[bool] = False,  # mistral model uses a smaller learning rate,
+):
+    model_args, data_args, training_args, _, _ = get_train_args(
+        dict(
+            stage=stage,
+            model_name_or_path=model_name_or_path,
+            dataset=dataset,
+            dataset_dir=dataset_dir,
+            template=template,
+            cutoff_len=cutoff_len,
+            output_dir="dummy_dir",
+            overwrite_cache=True,
+        )
+    )
+    tokenizer_module = load_tokenizer(model_args)
+    tokenizer = tokenizer_module["tokenizer"]
+    trainset = get_dataset(model_args, data_args, training_args, stage, **tokenizer_module)
+    if stage == "pt":
+        data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
+    elif stage == "sft":
+        data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, label_pad_token_id=IGNORE_INDEX)
+    else:
+        raise NotImplementedError
+
+    dataloader = DataLoader(
+        dataset=trainset, batch_size=batch_size, shuffle=True, collate_fn=data_collator, pin_memory=True
+    )
+    valid_tokens, total_tokens = 0, 0
+    for batch in tqdm(dataloader):
+        valid_tokens += torch.sum(batch["labels"] != IGNORE_INDEX).item()
+        total_tokens += torch.numel(batch["labels"])
+
+    batch_max_len = cutoff_len * batch_size  # max tokens in a batch
+    valid_ratio = valid_tokens / total_tokens
+    batch_valid_len = batch_max_len * valid_ratio
+    lr = BASE_LR * math.sqrt(batch_valid_len / BASE_BS)  # lr ~ sqrt(batch_size)
+    lr = lr / 6.0 if is_mistral else lr
+    print(
+        "Optimal learning rate is {:.2e} for valid ratio% {:.2f} and effective batch size {:.2f}".format(
+            lr, valid_ratio * 100, batch_valid_len
+        )
+    )
+
+
+if __name__ == "__main__":
+    fire.Fire(calculate_lr)
--- a/scripts/length_cdf.py
+++ b/scripts/length_cdf.py
@ -0,0 +1,52 @@
+# coding=utf-8
+# Calculates the distribution of the input lengths in the dataset.
+# Usage: python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en --template default
+
+from collections import defaultdict
+from typing import Optional
+
+import fire
+from tqdm import tqdm
+
+from llmtuner.data import get_dataset
+from llmtuner.hparams import get_train_args
+from llmtuner.model import load_tokenizer
+
+
+def length_cdf(
+    model_name_or_path: str,
+    dataset: Optional[str] = "alpaca_en",
+    dataset_dir: Optional[str] = "data",
+    template: Optional[str] = "default",
+    interval: Optional[int] = 1000,
+):
+    model_args, data_args, training_args, _, _ = get_train_args(
+        dict(
+            stage="sft",
+            model_name_or_path=model_name_or_path,
+            dataset=dataset,
+            dataset_dir=dataset_dir,
+            template=template,
+            cutoff_len=1_000_000,
+            output_dir="dummy_dir",
+            overwrite_cache=True,
+        )
+    )
+    tokenizer_module = load_tokenizer(model_args)
+    trainset = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module)
+    total_num = len(trainset)
+    length_dict = defaultdict(int)
+    for sample in tqdm(trainset["input_ids"]):
+        length_dict[len(sample) // interval * interval] += 1
+
+    length_tuples = list(length_dict.items())
+    length_tuples.sort()
+    count_accu, prob_accu = 0, 0
+    for length, count in length_tuples:
+        count_accu += count
+        prob_accu += count / total_num * 100
+        print("{:d} ({:.2f}%) samples have length < {}.".format(count_accu, prob_accu, length + interval))
+
+
+if __name__ == "__main__":
+    fire.Fire(length_cdf)
--- a/scripts/llama_pro.py
+++ b/scripts/llama_pro.py
@ -0,0 +1,115 @@
+# coding=utf-8
+# Performs block expansion for LLaMA, Mistral or Qwen1.5 models.
+# Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
+# Inspired by: https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py
+
+import json
+import os
+from collections import OrderedDict
+from typing import TYPE_CHECKING, Optional
+
+import fire
+import torch
+from safetensors.torch import save_file
+from tqdm import tqdm
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
+from transformers.modeling_utils import (
+    SAFE_WEIGHTS_INDEX_NAME,
+    SAFE_WEIGHTS_NAME,
+    WEIGHTS_INDEX_NAME,
+    WEIGHTS_NAME,
+    shard_checkpoint,
+)
+
+
+if TYPE_CHECKING:
+    from transformers import PretrainedConfig, PreTrainedModel
+
+
+def change_name(name: str, old_index: int, new_index: int) -> str:
+    return name.replace(".{:d}.".format(old_index), ".{:d}.".format(new_index))
+
+
+def block_expansion(
+    model_name_or_path: str,
+    output_dir: str,
+    num_expand: int,
+    shard_size: Optional[str] = "2GB",
+    save_safetensors: Optional[bool] = False,
+):
+    config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path)
+    num_layers = getattr(config, "num_hidden_layers")
+    setattr(config, "num_hidden_layers", num_layers + num_expand)
+    config.save_pretrained(output_dir)
+
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+    tokenizer.save_pretrained(output_dir)
+
+    config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path)  # load the original one
+    if save_safetensors:
+        setattr(config, "tie_word_embeddings", False)  # safetensors does not allow shared weights
+
+    model: "PreTrainedModel" = AutoModelForCausalLM.from_pretrained(
+        model_name_or_path,
+        config=config,
+        torch_dtype="auto",
+        trust_remote_code=True,
+        low_cpu_mem_usage=True,
+    )
+    state_dict = model.state_dict()
+
+    if num_layers % num_expand != 0:
+        raise ValueError("`num_layers` {} should be divisible by `num_expand` {}.".format(num_layers, num_expand))
+
+    split = num_layers // num_expand
+    layer_cnt = 0
+    output_state_dict = OrderedDict()
+    for i in range(num_layers):
+        for key, value in state_dict.items():
+            if ".{:d}.".format(i) in key:
+                output_state_dict[change_name(key, i, layer_cnt)] = value
+
+        print("Add layer {} copied from layer {}".format(layer_cnt, i))
+        layer_cnt += 1
+        if (i + 1) % split == 0:
+            for key, value in state_dict.items():
+                if ".{:d}.".format(i) in key:
+                    if "down_proj" in key or "o_proj" in key:
+                        output_state_dict[change_name(key, i, layer_cnt)] = torch.zeros_like(value)
+                    else:
+                        output_state_dict[change_name(key, i, layer_cnt)] = torch.clone(value)
+
+            print("Add layer {} expanded from layer {}".format(layer_cnt, i))
+            layer_cnt += 1
+
+    for key, value in state_dict.items():
+        if key not in output_state_dict:
+            output_state_dict[key] = value
+
+    weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
+    shards, index = shard_checkpoint(output_state_dict, max_shard_size=shard_size, weights_name=weights_name)
+
+    for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
+        if save_safetensors:
+            save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
+        else:
+            torch.save(shard, os.path.join(output_dir, shard_file))
+
+    if index is None:
+        print("Model weights saved in {}".format(os.path.join(output_dir, weights_name)))
+    else:
+        index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
+        with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
+            json.dump(index, f, indent=2, sort_keys=True)
+        print("Model weights saved in {}".format(output_dir))
+
+    print("Fine-tune this model with:")
+    print("  --model_name_or_path {} \\".format(output_dir))
+    print("  --finetuning_type freeze \\")
+    print("  --name_module_trainable all \\")
+    print("  --num_layer_trainable {} \\".format(num_expand))
+    print("  --use_llama_pro")
+
+
+if __name__ == "__main__":
+    fire.Fire(block_expansion)
--- a/scripts/llamafy_baichuan2.py
+++ b/scripts/llamafy_baichuan2.py
@ -0,0 +1,92 @@
+# coding=utf-8
+# Converts the Baichuan2-7B model in the same format as LLaMA2-7B.
+# Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
+# Inspired by: https://huggingface.co/fireballoon/baichuan-llama-7b/blob/main/convert_baichuan_to_llama.py
+# Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied
+
+import json
+import os
+from collections import OrderedDict
+from typing import Any, Dict, Optional
+
+import fire
+import torch
+from safetensors.torch import save_file
+from tqdm import tqdm
+from transformers.modeling_utils import (
+    SAFE_WEIGHTS_INDEX_NAME,
+    SAFE_WEIGHTS_NAME,
+    WEIGHTS_INDEX_NAME,
+    WEIGHTS_NAME,
+    shard_checkpoint,
+)
+
+
+CONFIG_NAME = "config.json"
+
+
+def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool):
+    baichuan2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
+    for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
+        if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".bin"):
+            shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu")
+            baichuan2_state_dict.update(shard_weight)
+
+    llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
+    for key, value in tqdm(baichuan2_state_dict.items(), desc="Convert format"):
+        if "W_pack" in key:
+            proj_size = value.size(0) // 3
+            llama2_state_dict[key.replace("W_pack", "q_proj")] = value[:proj_size, :]
+            llama2_state_dict[key.replace("W_pack", "k_proj")] = value[proj_size : 2 * proj_size, :]
+            llama2_state_dict[key.replace("W_pack", "v_proj")] = value[2 * proj_size :, :]
+        elif "lm_head" in key:
+            llama2_state_dict[key] = torch.nn.functional.normalize(value)
+        else:
+            llama2_state_dict[key] = value
+
+    weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
+    shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name)
+
+    for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
+        if save_safetensors:
+            save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
+        else:
+            torch.save(shard, os.path.join(output_dir, shard_file))
+
+    if index is None:
+        print("Model weights saved in {}".format(os.path.join(output_dir, WEIGHTS_NAME)))
+    else:
+        index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
+        with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
+            json.dump(index, f, indent=2, sort_keys=True)
+        print("Model weights saved in {}".format(output_dir))
+
+
+def save_config(input_dir: str, output_dir: str):
+    with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f:
+        llama2_config_dict: Dict[str, Any] = json.load(f)
+
+    llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
+    llama2_config_dict.pop("auto_map", None)
+    llama2_config_dict.pop("tokenizer_class", None)
+    llama2_config_dict["model_type"] = "llama"
+
+    with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
+        json.dump(llama2_config_dict, f, indent=2)
+    print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME)))
+
+
+def llamafy_baichuan2(
+    input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
+):
+    try:
+        os.makedirs(output_dir, exist_ok=False)
+    except Exception as e:
+        raise print("Output dir already exists", e)
+
+    save_weight(input_dir, output_dir, shard_size, save_safetensors)
+    save_config(input_dir, output_dir)
+
+
+if __name__ == "__main__":
+    fire.Fire(llamafy_baichuan2)
--- a/scripts/llamafy_qwen.py
+++ b/scripts/llamafy_qwen.py
@ -0,0 +1,144 @@
+# coding=utf-8
+# Converts the Qwen models in the same format as LLaMA2.
+# Usage: python llamafy_qwen.py --input_dir input --output_dir output
+# Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied
+
+import json
+import os
+from collections import OrderedDict
+from typing import Any, Dict, Optional
+
+import fire
+import torch
+from safetensors import safe_open
+from safetensors.torch import save_file
+from tqdm import tqdm
+from transformers.modeling_utils import (
+    SAFE_WEIGHTS_INDEX_NAME,
+    SAFE_WEIGHTS_NAME,
+    WEIGHTS_INDEX_NAME,
+    WEIGHTS_NAME,
+    shard_checkpoint,
+)
+from transformers.utils import check_min_version
+
+
+try:
+    check_min_version("4.34.0")
+except Exception:
+    raise ValueError("Please upgrade `transformers` to 4.34.0")
+
+
+CONFIG_NAME = "config.json"
+
+
+def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool) -> str:
+    qwen_state_dict: Dict[str, torch.Tensor] = OrderedDict()
+    for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
+        if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".safetensors"):
+            with safe_open(os.path.join(input_dir, filepath), framework="pt", device="cpu") as f:
+                for key in f.keys():
+                    qwen_state_dict[key] = f.get_tensor(key)
+
+    llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
+    torch_dtype = None
+    for key, value in tqdm(qwen_state_dict.items(), desc="Convert format"):
+        if torch_dtype is None:
+            torch_dtype = value.dtype
+        if "wte" in key:
+            llama2_state_dict["model.embed_tokens.weight"] = value
+        elif "ln_f" in key:
+            llama2_state_dict["model.norm.weight"] = value
+        else:
+            key = key.replace("transformer.h", "model.layers")
+            if "attn.c_attn" in key:
+                proj_size = value.size(0) // 3
+                llama2_state_dict[key.replace("attn.c_attn", "self_attn.q_proj")] = value[:proj_size, ...]
+                llama2_state_dict[key.replace("attn.c_attn", "self_attn.k_proj")] = value[
+                    proj_size : 2 * proj_size, ...
+                ]
+                llama2_state_dict[key.replace("attn.c_attn", "self_attn.v_proj")] = value[2 * proj_size :, ...]
+            elif "attn.c_proj" in key:
+                llama2_state_dict[key.replace("attn.c_proj", "self_attn.o_proj")] = value
+                llama2_state_dict[key.replace("attn.c_proj.weight", "self_attn.o_proj.bias")] = torch.zeros_like(
+                    value[:, 0]
+                ).squeeze()
+            elif "ln_1" in key:
+                llama2_state_dict[key.replace("ln_1", "input_layernorm")] = value
+            elif "ln_2" in key:
+                llama2_state_dict[key.replace("ln_2", "post_attention_layernorm")] = value
+            elif "mlp.w1" in key:
+                llama2_state_dict[key.replace("mlp.w1", "mlp.up_proj")] = value
+            elif "mlp.w2" in key:
+                llama2_state_dict[key.replace("mlp.w2", "mlp.gate_proj")] = value
+            elif "mlp.c_proj" in key:
+                llama2_state_dict[key.replace("mlp.c_proj", "mlp.down_proj")] = value
+            elif "lm_head" in key:
+                llama2_state_dict[key] = value
+            else:
+                raise KeyError("Unable to process key {}".format(key))
+
+    weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
+    shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name)
+
+    for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
+        if save_safetensors:
+            save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
+        else:
+            torch.save(shard, os.path.join(output_dir, shard_file))
+
+    if index is None:
+        print("Model weights saved in {}".format(os.path.join(output_dir, weights_name)))
+    else:
+        index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
+        with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
+            json.dump(index, f, indent=2, sort_keys=True)
+        print("Model weights saved in {}".format(output_dir))
+
+    return str(torch_dtype).replace("torch.", "")
+
+
+def save_config(input_dir: str, output_dir: str, torch_dtype: str):
+    with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f:
+        qwen_config_dict: Dict[str, Any] = json.load(f)
+
+    llama2_config_dict: Dict[str, Any] = OrderedDict()
+    llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
+    llama2_config_dict["hidden_act"] = "silu"
+    llama2_config_dict["hidden_size"] = qwen_config_dict["hidden_size"]
+    llama2_config_dict["initializer_range"] = qwen_config_dict["initializer_range"]
+    llama2_config_dict["intermediate_size"] = qwen_config_dict["intermediate_size"] // 2
+    llama2_config_dict["max_position_embeddings"] = qwen_config_dict["max_position_embeddings"]
+    llama2_config_dict["model_type"] = "llama"
+    llama2_config_dict["num_attention_heads"] = qwen_config_dict["num_attention_heads"]
+    llama2_config_dict["num_hidden_layers"] = qwen_config_dict["num_hidden_layers"]
+    llama2_config_dict["num_key_value_heads"] = qwen_config_dict["hidden_size"] // qwen_config_dict["kv_channels"]
+    llama2_config_dict["pretraining_tp"] = 1
+    llama2_config_dict["rms_norm_eps"] = qwen_config_dict["layer_norm_epsilon"]
+    llama2_config_dict["rope_scaling"] = None
+    llama2_config_dict["tie_word_embeddings"] = qwen_config_dict["tie_word_embeddings"]
+    llama2_config_dict["torch_dtype"] = torch_dtype
+    llama2_config_dict["transformers_version"] = "4.34.0"
+    llama2_config_dict["use_cache"] = True
+    llama2_config_dict["vocab_size"] = qwen_config_dict["vocab_size"]
+    llama2_config_dict["attention_bias"] = True
+
+    with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
+        json.dump(llama2_config_dict, f, indent=2)
+    print("Model config saved in {}".format(os.path.join(output_dir, CONFIG_NAME)))
+
+
+def llamafy_qwen(
+    input_dir: str, output_dir: str, shard_size: Optional[str] = "2GB", save_safetensors: Optional[bool] = False
+):
+    try:
+        os.makedirs(output_dir, exist_ok=False)
+    except Exception as e:
+        raise print("Output dir already exists", e)
+
+    torch_dtype = save_weight(input_dir, output_dir, shard_size, save_safetensors)
+    save_config(input_dir, output_dir, torch_dtype)
+
+
+if __name__ == "__main__":
+    fire.Fire(llamafy_qwen)
--- a/scripts/loftq_init.py
+++ b/scripts/loftq_init.py
@ -0,0 +1,82 @@
+# coding=utf-8
+# Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ)
+# Usage: python loftq_init.py --model_name_or_path path_to_model --save_dir output_dir
+# Inspired by: https://github.com/huggingface/peft/blob/main/examples/loftq_finetuning/quantize_save_load.py
+
+import os
+from typing import TYPE_CHECKING, Optional
+
+import fire
+import torch
+import torch.nn as nn
+from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+if TYPE_CHECKING:
+    from transformers import PreTrainedModel
+
+
+class Shell(nn.Module):
+    def __init__(self, weight: torch.Tensor, bias: Optional[torch.Tensor] = None):
+        super().__init__()
+        self.weight = nn.Parameter(weight, requires_grad=False)
+        if bias is not None:
+            self.bias = nn.Parameter(bias, requires_grad=False)
+
+
+def unwrap_model(model: nn.Module, pattern=".base_layer") -> None:
+    for name in {k.split(pattern)[0] for k, _ in model.named_modules() if pattern in k}:
+        parent_name = ".".join(name.split(".")[:-1])
+        child_name = name.split(".")[-1]
+        parent_module = model.get_submodule(parent_name)
+        child_module = getattr(parent_module, child_name)
+        base_layer = getattr(child_module, "base_layer")
+        weight = getattr(base_layer, "weight", None)
+        bias = getattr(base_layer, "bias", None)
+        setattr(parent_module, child_name, Shell(weight, bias))
+
+    print("Model unwrapped.")
+
+
+def quantize_loftq(
+    model_name_or_path: str,
+    save_dir: str,
+    loftq_bits: Optional[int] = 4,
+    loftq_iter: Optional[int] = 1,
+    lora_alpha: Optional[int] = None,
+    lora_rank: Optional[int] = 16,
+    lora_target: Optional[str] = "q_proj,v_proj",
+    save_safetensors: Optional[bool] = False,
+):
+    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(model_name_or_path, trust_remote_code=True, torch_dtype="auto")
+    loftq_config = LoftQConfig(loftq_bits=loftq_bits, loftq_iter=loftq_iter)
+    lora_config = LoraConfig(
+        task_type=TaskType.CAUSAL_LM,
+        inference_mode=True,
+        r=lora_rank,
+        lora_alpha=lora_alpha if lora_alpha is not None else lora_rank * 2,
+        lora_dropout=0.1,
+        target_modules=[name.strip() for name in lora_target.split(",")],
+        init_lora_weights="loftq",
+        loftq_config=loftq_config,
+    )
+
+    # Init LoftQ model
+    lora_model = get_peft_model(model, lora_config)
+    base_model: "PreTrainedModel" = lora_model.get_base_model()
+
+    # Save LoftQ model
+    setattr(lora_model.base_model.peft_config["default"], "base_model_name_or_path", save_dir)
+    setattr(lora_model.base_model.peft_config["default"], "init_lora_weights", True)
+    lora_model.save_pretrained(os.path.join(save_dir, "adapters"), safe_serialization=save_safetensors)
+
+    # Save base model
+    unwrap_model(base_model)
+    base_model.save_pretrained(save_dir, safe_serialization=save_safetensors)
+    tokenizer.save_pretrained(save_dir)
+
+
+if __name__ == "__main__":
+    fire.Fire(quantize_loftq)
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,73 @@
+import os
+import re
+
+from setuptools import find_packages, setup
+
+
+def get_version():
+    with open(os.path.join("src", "llmtuner", "__init__.py"), "r", encoding="utf-8") as f:
+        file_content = f.read()
+        pattern = r"{0}\W*=\W*\"([^\"]+)\"".format("__version__")
+        (version,) = re.findall(pattern, file_content)
+        return version
+
+
+def get_requires():
+    with open("requirements.txt", "r", encoding="utf-8") as f:
+        file_content = f.read()
+        lines = [line.strip() for line in file_content.strip().split("\n") if not line.startswith("#")]
+        return lines
+
+
+extra_require = {
+    "deepspeed": ["deepspeed>=0.10.0"],
+    "metrics": ["nltk", "jieba", "rouge-chinese"],
+    "galore": ["galore-torch"],
+    "badam": ["badam"],
+    "vllm": ["vllm>=0.4.0"],
+    "bitsandbytes": ["bitsandbytes>=0.39.0"],
+    "gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"],
+    "awq": ["autoawq"],
+    "aqlm": ["aqlm[gpu]>=1.1.0"],
+    "qwen": ["tiktoken", "transformers_stream_generator"],
+    "modelscope": ["modelscope"],
+    "quality": ["ruff"],
+}
+
+
+def main():
+    setup(
+        name="llmtuner",
+        version=get_version(),
+        author="hiyouga",
+        author_email="hiyouga" "@" "buaa.edu.cn",
+        description="Easy-to-use LLM fine-tuning framework",
+        long_description=open("README.md", "r", encoding="utf-8").read(),
+        long_description_content_type="text/markdown",
+        keywords=["LLaMA", "BLOOM", "Falcon", "LLM", "ChatGPT", "transformer", "pytorch", "deep learning"],
+        license="Apache 2.0 License",
+        url="https://github.com/hiyouga/LLaMA-Factory",
+        package_dir={"": "src"},
+        packages=find_packages("src"),
+        python_requires=">=3.8.0",
+        install_requires=get_requires(),
+        extras_require=extra_require,
+        classifiers=[
+            "Development Status :: 4 - Beta",
+            "Intended Audience :: Developers",
+            "Intended Audience :: Education",
+            "Intended Audience :: Science/Research",
+            "License :: OSI Approved :: Apache Software License",
+            "Operating System :: OS Independent",
+            "Programming Language :: Python :: 3",
+            "Programming Language :: Python :: 3.8",
+            "Programming Language :: Python :: 3.9",
+            "Programming Language :: Python :: 3.10",
+            "Programming Language :: Python :: 3.11",
+            "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        ],
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/src/api_demo.py
+++ b/src/api_demo.py
@ -0,0 +1,16 @@
+import os
+
+import uvicorn
+
+from llmtuner import ChatModel, create_app
+
+
+def main():
+    chat_model = ChatModel()
+    app = create_app(chat_model)
+    print("Visit http://localhost:{}/docs for API document.".format(os.environ.get("API_PORT", 8000)))
+    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("API_PORT", 8000)), workers=1)
+
+
+if __name__ == "__main__":
+    main()
--- a/src/cli_demo.py
+++ b/src/cli_demo.py
@ -0,0 +1,49 @@
+from llmtuner import ChatModel
+from llmtuner.extras.misc import torch_gc
+
+
+try:
+    import platform
+
+    if platform.system() != "Windows":
+        import readline  # noqa: F401
+except ImportError:
+    print("Install `readline` for a better experience.")
+
+
+def main():
+    chat_model = ChatModel()
+    messages = []
+    print("Welcome to the CLI application, use `clear` to remove the history, use `exit` to exit the application.")
+
+    while True:
+        try:
+            query = input("\nUser: ")
+        except UnicodeDecodeError:
+            print("Detected decoding error at the inputs, please set the terminal encoding to utf-8.")
+            continue
+        except Exception:
+            raise
+
+        if query.strip() == "exit":
+            break
+
+        if query.strip() == "clear":
+            messages = []
+            torch_gc()
+            print("History has been removed.")
+            continue
+
+        messages.append({"role": "user", "content": query})
+        print("Assistant: ", end="", flush=True)
+
+        response = ""
+        for new_text in chat_model.stream_chat(messages):
+            print(new_text, end="", flush=True)
+            response += new_text
+        print()
+        messages.append({"role": "assistant", "content": response})
+
+
+if __name__ == "__main__":
+    main()
--- a/src/evaluate.py
+++ b/src/evaluate.py
@ -0,0 +1,9 @@
+from llmtuner import Evaluator
+
+
+def main():
+    Evaluator().eval()
+
+
+if __name__ == "__main__":
+    main()
--- a/src/export_model.py
+++ b/src/export_model.py
@ -0,0 +1,9 @@
+from llmtuner import export_model
+
+
+def main():
+    export_model()
+
+
+if __name__ == "__main__":
+    main()
--- a/src/llmtuner/init.py
+++ b/src/llmtuner/init.py
@ -0,0 +1,11 @@
+# Level: api, webui > chat, eval, train > data, model > extras, hparams
+
+from .api import create_app
+from .chat import ChatModel
+from .eval import Evaluator
+from .train import export_model, run_exp
+from .webui import create_ui, create_web_demo
+
+
+__version__ = "0.7.0"
+__all__ = ["create_app", "ChatModel", "Evaluator", "export_model", "run_exp", "create_ui", "create_web_demo"]
--- a/src/llmtuner/api/init.py
+++ b/src/llmtuner/api/init.py
@ -0,0 +1,4 @@
+from .app import create_app
+
+
+__all__ = ["create_app"]
--- a/src/llmtuner/api/app.py
+++ b/src/llmtuner/api/app.py
@ -0,0 +1,230 @@
+import json
+import os
+from contextlib import asynccontextmanager
+from typing import Any, Dict, Sequence
+
+from pydantic import BaseModel
+
+from ..chat import ChatModel
+from ..data import Role as DataRole
+from ..extras.misc import torch_gc
+from ..extras.packages import is_fastapi_availble, is_starlette_available, is_uvicorn_available
+from .protocol import (
+    ChatCompletionMessage,
+    ChatCompletionRequest,
+    ChatCompletionResponse,
+    ChatCompletionResponseChoice,
+    ChatCompletionResponseStreamChoice,
+    ChatCompletionResponseUsage,
+    ChatCompletionStreamResponse,
+    Finish,
+    Function,
+    FunctionCall,
+    ModelCard,
+    ModelList,
+    Role,
+    ScoreEvaluationRequest,
+    ScoreEvaluationResponse,
+)
+
+
+if is_fastapi_availble():
+    from fastapi import FastAPI, HTTPException, status
+    from fastapi.middleware.cors import CORSMiddleware
+
+
+if is_starlette_available():
+    from sse_starlette import EventSourceResponse
+
+
+if is_uvicorn_available():
+    import uvicorn
+
+
+@asynccontextmanager
+async def lifespan(app: "FastAPI"):  # collects GPU memory
+    yield
+    torch_gc()
+
+
+def dictify(data: "BaseModel") -> Dict[str, Any]:
+    try:  # pydantic v2
+        return data.model_dump(exclude_unset=True)
+    except AttributeError:  # pydantic v1
+        return data.dict(exclude_unset=True)
+
+
+def jsonify(data: "BaseModel") -> str:
+    try:  # pydantic v2
+        return json.dumps(data.model_dump(exclude_unset=True), ensure_ascii=False)
+    except AttributeError:  # pydantic v1
+        return data.json(exclude_unset=True, ensure_ascii=False)
+
+
+def create_app(chat_model: "ChatModel") -> "FastAPI":
+    app = FastAPI(lifespan=lifespan)
+
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+
+    role_mapping = {
+        Role.USER: DataRole.USER.value,
+        Role.ASSISTANT: DataRole.ASSISTANT.value,
+        Role.SYSTEM: DataRole.SYSTEM.value,
+        Role.FUNCTION: DataRole.FUNCTION.value,
+        Role.TOOL: DataRole.OBSERVATION.value,
+    }
+
+    @app.get("/v1/models", response_model=ModelList)
+    async def list_models():
+        model_card = ModelCard(id="gpt-3.5-turbo")
+        return ModelList(data=[model_card])
+
+    @app.post("/v1/chat/completions", response_model=ChatCompletionResponse, status_code=status.HTTP_200_OK)
+    async def create_chat_completion(request: ChatCompletionRequest):
+        if not chat_model.engine.can_generate:
+            raise HTTPException(status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed")
+
+        if len(request.messages) == 0:
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid length")
+
+        if request.messages[0].role == Role.SYSTEM:
+            system = request.messages.pop(0).content
+        else:
+            system = ""
+
+        if len(request.messages) % 2 == 0:
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Only supports u/a/u/a/u...")
+
+        input_messages = []
+        for i, message in enumerate(request.messages):
+            if i % 2 == 0 and message.role not in [Role.USER, Role.TOOL]:
+                raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role")
+            elif i % 2 == 1 and message.role not in [Role.ASSISTANT, Role.FUNCTION]:
+                raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid role")
+
+            if message.role == Role.ASSISTANT and isinstance(message.tool_calls, list) and len(message.tool_calls):
+                name = message.tool_calls[0].function.name
+                arguments = message.tool_calls[0].function.arguments
+                content = json.dumps({"name": name, "argument": arguments}, ensure_ascii=False)
+                input_messages.append({"role": role_mapping[Role.FUNCTION], "content": content})
+            else:
+                input_messages.append({"role": role_mapping[message.role], "content": message.content})
+
+        tool_list = request.tools
+        if isinstance(tool_list, list) and len(tool_list):
+            try:
+                tools = json.dumps([dictify(tool.function) for tool in tool_list], ensure_ascii=False)
+            except Exception:
+                raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid tools")
+        else:
+            tools = ""
+
+        if request.stream:
+            if tools:
+                raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream function calls.")
+
+            generate = stream_chat_completion(input_messages, system, tools, request)
+            return EventSourceResponse(generate, media_type="text/event-stream")
+
+        responses = await chat_model.achat(
+            input_messages,
+            system,
+            tools,
+            do_sample=request.do_sample,
+            temperature=request.temperature,
+            top_p=request.top_p,
+            max_new_tokens=request.max_tokens,
+            num_return_sequences=request.n,
+        )
+
+        prompt_length, response_length = 0, 0
+        choices = []
+        for i, response in enumerate(responses):
+            if tools:
+                result = chat_model.engine.template.format_tools.extract(response.response_text)
+            else:
+                result = response.response_text
+
+            if isinstance(result, tuple):
+                name, arguments = result
+                function = Function(name=name, arguments=arguments)
+                response_message = ChatCompletionMessage(
+                    role=Role.ASSISTANT, tool_calls=[FunctionCall(function=function)]
+                )
+                finish_reason = Finish.TOOL
+            else:
+                response_message = ChatCompletionMessage(role=Role.ASSISTANT, content=result)
+                finish_reason = Finish.STOP if response.finish_reason == "stop" else Finish.LENGTH
+
+            choices.append(
+                ChatCompletionResponseChoice(index=i, message=response_message, finish_reason=finish_reason)
+            )
+            prompt_length = response.prompt_length
+            response_length += response.response_length
+
+        usage = ChatCompletionResponseUsage(
+            prompt_tokens=prompt_length,
+            completion_tokens=response_length,
+            total_tokens=prompt_length + response_length,
+        )
+
+        return ChatCompletionResponse(model=request.model, choices=choices, usage=usage)
+
+    async def stream_chat_completion(
+        messages: Sequence[Dict[str, str]], system: str, tools: str, request: ChatCompletionRequest
+    ):
+        choice_data = ChatCompletionResponseStreamChoice(
+            index=0, delta=ChatCompletionMessage(role=Role.ASSISTANT, content=""), finish_reason=None
+        )
+        chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])
+        yield jsonify(chunk)
+
+        async for new_token in chat_model.astream_chat(
+            messages,
+            system,
+            tools,
+            do_sample=request.do_sample,
+            temperature=request.temperature,
+            top_p=request.top_p,
+            max_new_tokens=request.max_tokens,
+        ):
+            if len(new_token) == 0:
+                continue
+
+            choice_data = ChatCompletionResponseStreamChoice(
+                index=0, delta=ChatCompletionMessage(content=new_token), finish_reason=None
+            )
+            chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])
+            yield jsonify(chunk)
+
+        choice_data = ChatCompletionResponseStreamChoice(
+            index=0, delta=ChatCompletionMessage(), finish_reason=Finish.STOP
+        )
+        chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])
+        yield jsonify(chunk)
+        yield "[DONE]"
+
+    @app.post("/v1/score/evaluation", response_model=ScoreEvaluationResponse, status_code=status.HTTP_200_OK)
+    async def create_score_evaluation(request: ScoreEvaluationRequest):
+        if chat_model.engine.can_generate:
+            raise HTTPException(status_code=status.HTTP_405_METHOD_NOT_ALLOWED, detail="Not allowed")
+
+        if len(request.messages) == 0:
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid request")
+
+        scores = await chat_model.aget_scores(request.messages, max_length=request.max_length)
+        return ScoreEvaluationResponse(model=request.model, scores=scores)
+
+    return app
+
+
+if __name__ == "__main__":
+    chat_model = ChatModel()
+    app = create_app(chat_model)
+    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("API_PORT", 8000)), workers=1)
--- a/src/llmtuner/api/protocol.py
+++ b/src/llmtuner/api/protocol.py
@ -0,0 +1,128 @@
+import time
+from enum import Enum, unique
+from typing import Any, Dict, List, Optional
+
+from pydantic import BaseModel, Field
+from typing_extensions import Literal
+
+
+@unique
+class Role(str, Enum):
+    USER = "user"
+    ASSISTANT = "assistant"
+    SYSTEM = "system"
+    FUNCTION = "function"
+    TOOL = "tool"
+
+
+@unique
+class Finish(str, Enum):
+    STOP = "stop"
+    LENGTH = "length"
+    TOOL = "tool_calls"
+
+
+class ModelCard(BaseModel):
+    id: str
+    object: Literal["model"] = "model"
+    created: int = Field(default_factory=lambda: int(time.time()))
+    owned_by: Literal["owner"] = "owner"
+
+
+class ModelList(BaseModel):
+    object: Literal["list"] = "list"
+    data: List[ModelCard] = []
+
+
+class Function(BaseModel):
+    name: str
+    arguments: str
+
+
+class FunctionDefinition(BaseModel):
+    name: str
+    description: str
+    parameters: Dict[str, Any]
+
+
+class FunctionAvailable(BaseModel):
+    type: Literal["function", "code_interpreter"] = "function"
+    function: Optional[FunctionDefinition] = None
+
+
+class FunctionCall(BaseModel):
+    id: Literal["call_default"] = "call_default"
+    type: Literal["function"] = "function"
+    function: Function
+
+
+class ChatMessage(BaseModel):
+    role: Role
+    content: Optional[str] = None
+    tool_calls: Optional[List[FunctionCall]] = None
+
+
+class ChatCompletionMessage(BaseModel):
+    role: Optional[Role] = None
+    content: Optional[str] = None
+    tool_calls: Optional[List[FunctionCall]] = None
+
+
+class ChatCompletionRequest(BaseModel):
+    model: str
+    messages: List[ChatMessage]
+    tools: Optional[List[FunctionAvailable]] = None
+    do_sample: bool = True
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    n: int = 1
+    max_tokens: Optional[int] = None
+    stream: bool = False
+
+
+class ChatCompletionResponseChoice(BaseModel):
+    index: int
+    message: ChatCompletionMessage
+    finish_reason: Finish
+
+
+class ChatCompletionResponseStreamChoice(BaseModel):
+    index: int
+    delta: ChatCompletionMessage
+    finish_reason: Optional[Finish] = None
+
+
+class ChatCompletionResponseUsage(BaseModel):
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+
+
+class ChatCompletionResponse(BaseModel):
+    id: Literal["chatcmpl-default"] = "chatcmpl-default"
+    object: Literal["chat.completion"] = "chat.completion"
+    created: int = Field(default_factory=lambda: int(time.time()))
+    model: str
+    choices: List[ChatCompletionResponseChoice]
+    usage: ChatCompletionResponseUsage
+
+
+class ChatCompletionStreamResponse(BaseModel):
+    id: Literal["chatcmpl-default"] = "chatcmpl-default"
+    object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
+    created: int = Field(default_factory=lambda: int(time.time()))
+    model: str
+    choices: List[ChatCompletionResponseStreamChoice]
+
+
+class ScoreEvaluationRequest(BaseModel):
+    model: str
+    messages: List[str]
+    max_length: Optional[int] = None
+
+
+class ScoreEvaluationResponse(BaseModel):
+    id: Literal["scoreeval-default"] = "scoreeval-default"
+    object: Literal["score.evaluation"] = "score.evaluation"
+    model: str
+    scores: List[float]
--- a/src/llmtuner/chat/init.py
+++ b/src/llmtuner/chat/init.py
@ -0,0 +1,5 @@
+from .base_engine import BaseEngine
+from .chat_model import ChatModel
+
+
+__all__ = ["BaseEngine", "ChatModel"]
--- a/Show More
+++ b/Show More