Merge pull request #3794 from jue-jue-zi/main

feat: pass the `max_lora_rank` parameter to vLLM backend
This commit is contained in:
hoshi-hiyouga 2024-05-17 16:17:30 +08:00 committed by GitHub
commit d7ff49f245
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 5 additions and 0 deletions

View File

@ -59,6 +59,7 @@ class VllmEngine(BaseEngine):
"disable_log_requests": True,
"enforce_eager": model_args.vllm_enforce_eager,
"enable_lora": model_args.adapter_name_or_path is not None,
"max_lora_rank": model_args.vllm_max_lora_rank,
}
if model_args.visual_inputs:

View File

@ -117,6 +117,10 @@ class ModelArguments:
default=False,
metadata={"help": "Whether or not to disable CUDA graph in the vLLM engine."},
)
vllm_max_lora_rank: int = field(
default=8,
metadata={"help": "Maximum rank of all LoRAs in the vLLM engine."},
)
offload_folder: str = field(
default="offload",
metadata={"help": "Path to offload model weights."},