fix eval scripts

2024-05-05 00:53:07 +08:00 · 2024-05-05 00:53:07 +08:00 · 177604fb6b
parent af596988b1
commit 177604fb6b
3 changed files with 74 additions and 86 deletions
--- a/evaluation/ceval/ceval.py
+++ b/evaluation/ceval/ceval.py
@ -19,7 +19,7 @@ import pandas as pd

 _CITATION = """\
@article{huang2023ceval,
-  title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models}, 
+  title={C-Eval: A Multi-Level Multi-Discipline Chinese Evaluation Suite for Foundation Models},
  author={Huang, Yuzhen and Bai, Yuzhuo and Zhu, Zhihao and Zhang, Junlei and Zhang, Jinghan and Su, Tangjun and Liu, Junteng and Lv, Chuancheng and Zhang, Yikai and Lei, Jiayi and Fu, Yao and Sun, Maosong and He, Junxian},
  journal={arXiv preprint arXiv:2305.08322},
  year={2023}
@ -133,25 +133,19 @@ class Ceval(datasets.GeneratorBasedBuilder):
            datasets.SplitGenerator(
                name=datasets.Split.TEST,
                gen_kwargs={
-                    "filepath": os.path.join(
-                        data_dir, "test", f"{task_name}_test.csv"
-                    ),
+                    "filepath": os.path.join(data_dir, "test", f"{task_name}_test.csv"),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.VALIDATION,
                gen_kwargs={
-                    "filepath": os.path.join(
-                        data_dir, "val", f"{task_name}_val.csv"
-                    ),
+                    "filepath": os.path.join(data_dir, "val", f"{task_name}_val.csv"),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.TRAIN,
                gen_kwargs={
-                    "filepath": os.path.join(
-                        data_dir, "dev", f"{task_name}_dev.csv"
-                    ),
+                    "filepath": os.path.join(data_dir, "dev", f"{task_name}_dev.csv"),
                },
            ),
        ]
--- a/evaluation/cmmlu/cmmlu.py
+++ b/evaluation/cmmlu/cmmlu.py
@ -37,73 +37,73 @@ _LICENSE = "Creative Commons Attribution-NonCommercial-ShareAlike 4.0 Internatio
 _URL = "cmmlu.zip"

 task_list = [
-     'agronomy',
-     'anatomy',
-     'ancient_chinese',
-     'arts',
-     'astronomy',
-     'business_ethics',
-     'chinese_civil_service_exam',
-     'chinese_driving_rule',
-     'chinese_food_culture',
-     'chinese_foreign_policy',
-     'chinese_history',
-     'chinese_literature',
-     'chinese_teacher_qualification',
-     'clinical_knowledge',
-     'college_actuarial_science',
-     'college_education',
-     'college_engineering_hydrology',
-     'college_law',
-     'college_mathematics',
-     'college_medical_statistics',
-     'college_medicine',
-     'computer_science',
-     'computer_security',
-     'conceptual_physics',
-     'construction_project_management',
-     'economics',
-     'education',
-     'electrical_engineering',
-     'elementary_chinese',
-     'elementary_commonsense',
-     'elementary_information_and_technology',
-     'elementary_mathematics',
-     'ethnology',
-     'food_science',
-     'genetics',
-     'global_facts',
-     'high_school_biology',
-     'high_school_chemistry',
-     'high_school_geography',
-     'high_school_mathematics',
-     'high_school_physics',
-     'high_school_politics',
-     'human_sexuality',
-     'international_law',
-     'journalism',
-     'jurisprudence',
-     'legal_and_moral_basis',
-     'logical',
-     'machine_learning',
-     'management',
-     'marketing',
-     'marxist_theory',
-     'modern_chinese',
-     'nutrition',
-     'philosophy',
-     'professional_accounting',
-     'professional_law',
-     'professional_medicine',
-     'professional_psychology',
-     'public_relations',
-     'security_study',
-     'sociology',
-     'sports_science',
-     'traditional_chinese_medicine',
-     'virology',
-     'world_history',
-     'world_religions',
+    "agronomy",
+    "anatomy",
+    "ancient_chinese",
+    "arts",
+    "astronomy",
+    "business_ethics",
+    "chinese_civil_service_exam",
+    "chinese_driving_rule",
+    "chinese_food_culture",
+    "chinese_foreign_policy",
+    "chinese_history",
+    "chinese_literature",
+    "chinese_teacher_qualification",
+    "clinical_knowledge",
+    "college_actuarial_science",
+    "college_education",
+    "college_engineering_hydrology",
+    "college_law",
+    "college_mathematics",
+    "college_medical_statistics",
+    "college_medicine",
+    "computer_science",
+    "computer_security",
+    "conceptual_physics",
+    "construction_project_management",
+    "economics",
+    "education",
+    "electrical_engineering",
+    "elementary_chinese",
+    "elementary_commonsense",
+    "elementary_information_and_technology",
+    "elementary_mathematics",
+    "ethnology",
+    "food_science",
+    "genetics",
+    "global_facts",
+    "high_school_biology",
+    "high_school_chemistry",
+    "high_school_geography",
+    "high_school_mathematics",
+    "high_school_physics",
+    "high_school_politics",
+    "human_sexuality",
+    "international_law",
+    "journalism",
+    "jurisprudence",
+    "legal_and_moral_basis",
+    "logical",
+    "machine_learning",
+    "management",
+    "marketing",
+    "marxist_theory",
+    "modern_chinese",
+    "nutrition",
+    "philosophy",
+    "professional_accounting",
+    "professional_law",
+    "professional_medicine",
+    "professional_psychology",
+    "public_relations",
+    "security_study",
+    "sociology",
+    "sports_science",
+    "traditional_chinese_medicine",
+    "virology",
+    "world_history",
+    "world_religions",
 ]


--- a/evaluation/mmlu/mmlu.py
+++ b/evaluation/mmlu/mmlu.py
@ -136,25 +136,19 @@ class MMLU(datasets.GeneratorBasedBuilder):
            datasets.SplitGenerator(
                name=datasets.Split.TEST,
                gen_kwargs={
-                    "filepath": os.path.join(
-                        data_dir, "data", "test", f"{task_name}_test.csv"
-                    ),
+                    "filepath": os.path.join(data_dir, "data", "test", f"{task_name}_test.csv"),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.VALIDATION,
                gen_kwargs={
-                    "filepath": os.path.join(
-                        data_dir, "data", "val", f"{task_name}_val.csv"
-                    ),
+                    "filepath": os.path.join(data_dir, "data", "val", f"{task_name}_val.csv"),
                },
            ),
            datasets.SplitGenerator(
                name=datasets.Split.TRAIN,
                gen_kwargs={
-                    "filepath": os.path.join(
-                        data_dir, "data", "dev", f"{task_name}_dev.csv"
-                    ),
+                    "filepath": os.path.join(data_dir, "data", "dev", f"{task_name}_dev.csv"),
                },
            ),
        ]