fix bug

2023-08-08 17:55:55 +08:00 · 2023-08-08 17:55:55 +08:00 · 4b841a6b35
parent a9980617f5
commit 4b841a6b35
3 changed files with 3 additions and 5 deletions
--- a/src/llmtuner/chat/stream_chat.py
+++ b/src/llmtuner/chat/stream_chat.py
@ -15,6 +15,7 @@ class ChatModel:
        model_args, data_args, finetuning_args, self.generating_args = get_infer_args(args)
        self.model, self.tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
        self.model = dispatch_model(self.model)
+        self.model = self.model.eval() # change to eval mode
        self.template = get_template(data_args.template)
        self.source_prefix = data_args.source_prefix
        self.stop_ids = self.tokenizer.convert_tokens_to_ids(self.template.stop_words)
--- a/src/llmtuner/extras/template.py
+++ b/src/llmtuner/extras/template.py
@ -75,7 +75,7 @@ class Template:
        if tokenizer.eos_token_id and getattr(tokenizer, "add_eos_token", False):
            eos_ids = [tokenizer.eos_token_id]
        else: # use the first stop word as the eos token
-            eos_ids = tokenizer.convert_tokens_to_ids(self.stop_words[0])
+            eos_ids = [tokenizer.convert_tokens_to_ids(self.stop_words[0])]

        return bos_ids, eos_ids

--- a/src/llmtuner/tuner/core/loader.py
+++ b/src/llmtuner/tuner/core/loader.py
@ -84,10 +84,7 @@ def load_model_and_tokenizer(
        if model_args.quantization_bit == 8:
            require_version("bitsandbytes>=0.37.0", "To fix: pip install bitsandbytes>=0.37.0")
            config_kwargs["load_in_8bit"] = True
-            config_kwargs["quantization_config"] = BitsAndBytesConfig(
-                load_in_8bit=True,
-                llm_int8_threshold=6.0
-            )
+            config_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True)

        elif model_args.quantization_bit == 4:
            require_version("bitsandbytes>=0.39.0", "To fix: pip install bitsandbytes>=0.39.0")