From 1afa51c2fa9839056644803eedef4e9d1af0d51e Mon Sep 17 00:00:00 2001 From: hiyouga Date: Sat, 5 Aug 2023 13:42:42 +0800 Subject: [PATCH] fix template for tiktoken --- src/llmtuner/extras/template.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llmtuner/extras/template.py b/src/llmtuner/extras/template.py index 8a01bb85..066f6c79 100644 --- a/src/llmtuner/extras/template.py +++ b/src/llmtuner/extras/template.py @@ -102,6 +102,7 @@ class Template: for elem in context: if isinstance(elem, str): elem = elem.replace("{{query}}", query, 1) + elem = elem.replace("", "[MASK]") token_ids = token_ids + tokenizer.encode(elem, add_special_tokens=False) elif isinstance(elem, dict): token_ids = token_ids + [tokenizer.convert_tokens_to_ids(elem.get("token"))]