fix stop param

2024-05-07 00:41:04 +08:00 · 2024-05-07 00:41:04 +08:00 · 09f3ef1de4
parent bcf7ec5ceb
commit 09f3ef1de4
4 changed files with 11 additions and 4 deletions
--- a/data/dataset_info.json
+++ b/data/dataset_info.json
@ -459,4 +459,4 @@
    },
    "folder": "python"
  }
-}
+}
--- a/src/llmtuner/api/chat.py
+++ b/src/llmtuner/api/chat.py
@ -103,7 +103,7 @@ async def create_chat_completion_response(
        top_p=request.top_p,
        max_new_tokens=request.max_tokens,
        num_return_sequences=request.n,
-        stop=request.stop
+        stop=request.stop,
    )
    prompt_length, response_length = 0, 0
@ -145,6 +145,9 @@ async def create_stream_chat_completion_response(
    if tools:
        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream function calls.")
    if request.n > 1:
        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream multiple responses.")
    yield _create_stream_chat_completion_chunk(
        completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(role=Role.ASSISTANT, content="")
    )
@ -156,7 +159,7 @@ async def create_stream_chat_completion_response(
        temperature=request.temperature,
        top_p=request.top_p,
        max_new_tokens=request.max_tokens,
-        stop=request.stop
+        stop=request.stop,
    ):
        if len(new_token) != 0:
            yield _create_stream_chat_completion_chunk(
--- a/src/llmtuner/api/protocol.py
+++ b/src/llmtuner/api/protocol.py
@ -77,8 +77,8 @@ class ChatCompletionRequest(BaseModel):
    top_p: Optional[float] = None
    n: int = 1
    max_tokens: Optional[int] = None
    stop: Optional[Union[str, List[str]]] = None
    stream: bool = False
    stop: Union[Optional[str], List[str]] = None
 class ChatCompletionResponseChoice(BaseModel):
--- a/src/llmtuner/chat/hf_engine.py
+++ b/src/llmtuner/chat/hf_engine.py
@ -73,6 +73,10 @@ class HuggingfaceEngine(BaseEngine):
        repetition_penalty = input_kwargs.pop("repetition_penalty", None)
        max_length = input_kwargs.pop("max_length", None)
        max_new_tokens = input_kwargs.pop("max_new_tokens", None)
        stop = input_kwargs.pop("stop", None)
        if stop is not None:
            raise ValueError("Stop parameter is not supported in Huggingface engine yet.")
        generating_args.update(
            dict(