From 091010492b2afc8ad25359323aba760523a4c5af Mon Sep 17 00:00:00 2001 From: hiyouga <467089858@qq.com> Date: Wed, 24 Jul 2024 17:00:29 +0800 Subject: [PATCH] fix #4928 --- src/llamafactory/data/aligner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/llamafactory/data/aligner.py b/src/llamafactory/data/aligner.py index 299bdca3..ef70d75b 100644 --- a/src/llamafactory/data/aligner.py +++ b/src/llamafactory/data/aligner.py @@ -120,15 +120,15 @@ def convert_sharegpt( even_tags = (dataset_attr.assistant_tag, dataset_attr.function_tag) accept_tags = (odd_tags, even_tags) for i, messages in enumerate(examples[dataset_attr.messages]): + if len(messages) == 0: + continue + if dataset_attr.system_tag and messages[0][dataset_attr.role_tag] == dataset_attr.system_tag: system = messages[0][dataset_attr.content_tag] messages = messages[1:] else: system = examples[dataset_attr.system][i] if dataset_attr.system else "" - if len(messages) == 0: - continue - aligned_messages = [] broken_data = False for turn_idx, message in enumerate(messages):