From 6703d0546d02ea8174552ad04cb0b4bbfd7e0201 Mon Sep 17 00:00:00 2001 From: Mark Mueller Date: Thu, 8 Feb 2024 17:56:18 +0100 Subject: [PATCH] Slim Orca data parsing --- src/llmtuner/data/aligner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llmtuner/data/aligner.py b/src/llmtuner/data/aligner.py index d4f281bc..cd3a7ea4 100644 --- a/src/llmtuner/data/aligner.py +++ b/src/llmtuner/data/aligner.py @@ -60,6 +60,7 @@ def convert_sharegpt(examples: Dict[str, List[Any]], dataset_attr: "DatasetAttr" if dataset_attr.system_tag and message[dataset_attr.role_tag] == dataset_attr.system_tag: outputs["system"].append(message[dataset_attr.content_tag]) n_sys = 1 + continue if (turn_idx - n_sys) % 2 == 0: accept_tags = [dataset_attr.user_tag, dataset_attr.observation_tag]