diff --git a/src/llmtuner/dsets/preprocess.py b/src/llmtuner/dsets/preprocess.py index 0562d303..2a25805e 100644 --- a/src/llmtuner/dsets/preprocess.py +++ b/src/llmtuner/dsets/preprocess.py @@ -119,10 +119,10 @@ def preprocess_dataset( print("input_ids:\n{}".format(example["input_ids"])) print("inputs:\n{}".format(tokenizer.decode(example["input_ids"], skip_special_tokens=False))) print("label_ids:\n{}".format(example["labels"])) - print("labels:\n{}".format( - tokenizer.decode([d if d != IGNORE_INDEX else tokenizer.pad_token_id for d in example["labels"]], - skip_special_tokens=False) - )) + print("labels:\n{}".format(''.join([ + tokenizer.decode(d, skip_special_tokens=False) + if d != IGNORE_INDEX else '-100' for d in example["labels"] + ]))) def print_pairwise_dataset_example(example): print("accept_ids:\n{}".format(example["accept_ids"]))