fix template for tiktoken

This commit is contained in:
hiyouga 2023-08-05 13:42:42 +08:00
parent 53d95725c5
commit 1afa51c2fa
1 changed files with 1 additions and 0 deletions

View File

@ -102,6 +102,7 @@ class Template:
for elem in context: for elem in context:
if isinstance(elem, str): if isinstance(elem, str):
elem = elem.replace("{{query}}", query, 1) elem = elem.replace("{{query}}", query, 1)
elem = elem.replace("<mask>", "[MASK]")
token_ids = token_ids + tokenizer.encode(elem, add_special_tokens=False) token_ids = token_ids + tokenizer.encode(elem, add_special_tokens=False)
elif isinstance(elem, dict): elif isinstance(elem, dict):
token_ids = token_ids + [tokenizer.convert_tokens_to_ids(elem.get("token"))] token_ids = token_ids + [tokenizer.convert_tokens_to_ids(elem.get("token"))]