This commit is contained in:
hiyouga 2023-09-14 18:37:34 +08:00
parent 8857e45602
commit 8632bff811
1 changed files with 3 additions and 3 deletions

View File

@ -140,9 +140,9 @@ def preprocess_dataset(
print("input_ids:\n{}".format(example["input_ids"])) print("input_ids:\n{}".format(example["input_ids"]))
print("inputs:\n{}".format(tokenizer.decode(example["input_ids"], skip_special_tokens=False))) print("inputs:\n{}".format(tokenizer.decode(example["input_ids"], skip_special_tokens=False)))
print("label_ids:\n{}".format(example["labels"])) print("label_ids:\n{}".format(example["labels"]))
print("labels:\n{}".format(tokenizer.decode([ print("labels:\n{}".format(
token_id if token_id != IGNORE_INDEX else tokenizer.pad_token_id for token_id in example["labels"] tokenizer.decode(list(filter(lambda x: x != IGNORE_INDEX, example["labels"])), skip_special_tokens=False)
], skip_special_tokens=False))) ))
def print_pairwise_dataset_example(example): def print_pairwise_dataset_example(example):
print("prompt_ids:\n{}".format(example["prompt_ids"])) print("prompt_ids:\n{}".format(example["prompt_ids"]))