fix #4022

2024-06-03 18:38:36 +08:00 · 2024-06-03 18:38:36 +08:00 · 24e1c0e2ee
parent 876bc92865
commit 24e1c0e2ee
3 changed files with 3 additions and 5 deletions
--- a/setup.py
+++ b/setup.py
@ -25,7 +25,7 @@ extra_require = {
    "metrics": ["nltk", "jieba", "rouge-chinese"],
    "deepspeed": ["deepspeed>=0.10.0,<=0.14.0"],
    "bitsandbytes": ["bitsandbytes>=0.39.0"],
-    "vllm": ["vllm>=0.4.1"],
+    "vllm": ["vllm>=0.4.3"],
    "galore": ["galore-torch"],
    "badam": ["badam"],
    "gptq": ["optimum>=1.16.0", "auto-gptq>=0.5.0"],
--- a/src/llamafactory/chat/vllm_engine.py
+++ b/src/llamafactory/chat/vllm_engine.py
@ -158,12 +158,10 @@ class VllmEngine(BaseEngine):
        )

        result_generator = self.model.generate(
-            prompt=None,
+            inputs={"prompt_token_ids": prompt_ids, "multi_modal_data": multi_modal_data},
            sampling_params=sampling_params,
            request_id=request_id,
-            prompt_token_ids=prompt_ids,
            lora_request=self.lora_request,
-            multi_modal_data=multi_modal_data,
        )
        return result_generator

--- a/src/llamafactory/hparams/parser.py
+++ b/src/llamafactory/hparams/parser.py
@ -94,7 +94,7 @@ def _check_extra_dependencies(
        require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6")

    if model_args.infer_backend == "vllm":
-        require_version("vllm>=0.4.1", "To fix: pip install vllm>=0.4.1")
+        require_version("vllm>=0.4.3", "To fix: pip install vllm>=0.4.3")

    if finetuning_args.use_galore:
        require_version("galore_torch", "To fix: pip install galore_torch")