LLaMA-Factory-Mirror/evaluation/cmmlu/cmmlu.py

# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os

import datasets
import pandas as pd


_CITATION = """\
@article{li2023cmmlu,
  title={CMMLU: Measuring massive multitask language understanding in Chinese},
  author={Haonan Li and Yixuan Zhang and Fajri Koto and Yifei Yang and Hai Zhao and Yeyun Gong and Nan Duan and Timothy Baldwin},
  journal={arXiv preprint arXiv:2306.09212},
  year={2023}
}
"""

_DESCRIPTION = """\
CMMLU is a comprehensive Chinese assessment suite specifically designed to evaluate the advanced knowledge and reasoning abilities of LLMs within the Chinese language and cultural context.
"""

_HOMEPAGE = "https://github.com/haonan-li/CMMLU"

_LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License"

_URL = "cmmlu.zip"

task_list = [
     'agronomy',
     'anatomy',
     'ancient_chinese',
     'arts',
     'astronomy',
     'business_ethics',
     'chinese_civil_service_exam',
     'chinese_driving_rule',
     'chinese_food_culture',
     'chinese_foreign_policy',
     'chinese_history',
     'chinese_literature',
     'chinese_teacher_qualification',
     'clinical_knowledge',
     'college_actuarial_science',
     'college_education',
     'college_engineering_hydrology',
     'college_law',
     'college_mathematics',
     'college_medical_statistics',
     'college_medicine',
     'computer_science',
     'computer_security',
     'conceptual_physics',
     'construction_project_management',
     'economics',
     'education',
     'electrical_engineering',
     'elementary_chinese',
     'elementary_commonsense',
     'elementary_information_and_technology',
     'elementary_mathematics',
     'ethnology',
     'food_science',
     'genetics',
     'global_facts',
     'high_school_biology',
     'high_school_chemistry',
     'high_school_geography',
     'high_school_mathematics',
     'high_school_physics',
     'high_school_politics',
     'human_sexuality',
     'international_law',
     'journalism',
     'jurisprudence',
     'legal_and_moral_basis',
     'logical',
     'machine_learning',
     'management',
     'marketing',
     'marxist_theory',
     'modern_chinese',
     'nutrition',
     'philosophy',
     'professional_accounting',
     'professional_law',
     'professional_medicine',
     'professional_psychology',
     'public_relations',
     'security_study',
     'sociology',
     'sports_science',
     'traditional_chinese_medicine',
     'virology',
     'world_history',
     'world_religions',
]


class CMMLUConfig(datasets.BuilderConfig):
    def __init__(self, **kwargs):
        super().__init__(version=datasets.Version("1.0.1"), **kwargs)


class CMMLU(datasets.GeneratorBasedBuilder):
    BUILDER_CONFIGS = [
        CMMLUConfig(
            name=task_name,
        )
        for task_name in task_list
    ]

    def _info(self):
        features = datasets.Features(
            {
                "question": datasets.Value("string"),
                "A": datasets.Value("string"),
                "B": datasets.Value("string"),
                "C": datasets.Value("string"),
                "D": datasets.Value("string"),
                "answer": datasets.Value("string"),
            }
        )
        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=features,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
        )

    def _split_generators(self, dl_manager):
        data_dir = dl_manager.download_and_extract(_URL)
        task_name = self.config.name
        return [
            datasets.SplitGenerator(
                name=datasets.Split.TEST,
                gen_kwargs={
                    "filepath": os.path.join(data_dir, f"test/{task_name}.csv"),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.TRAIN,
                gen_kwargs={
                    "filepath": os.path.join(data_dir, f"dev/{task_name}.csv"),
                },
            ),
        ]

    def _generate_examples(self, filepath):
        df = pd.read_csv(filepath, header=0, index_col=0, encoding="utf-8")
        for i, instance in enumerate(df.to_dict(orient="records")):
            question = instance.pop("Question", "")
            answer = instance.pop("Answer", "")
            instance["question"] = question
            instance["answer"] = answer
            yield i, instance