fix stop param
This commit is contained in:
parent
bcf7ec5ceb
commit
09f3ef1de4
|
@ -459,4 +459,4 @@
|
||||||
},
|
},
|
||||||
"folder": "python"
|
"folder": "python"
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -103,7 +103,7 @@ async def create_chat_completion_response(
|
||||||
top_p=request.top_p,
|
top_p=request.top_p,
|
||||||
max_new_tokens=request.max_tokens,
|
max_new_tokens=request.max_tokens,
|
||||||
num_return_sequences=request.n,
|
num_return_sequences=request.n,
|
||||||
stop=request.stop
|
stop=request.stop,
|
||||||
)
|
)
|
||||||
|
|
||||||
prompt_length, response_length = 0, 0
|
prompt_length, response_length = 0, 0
|
||||||
|
@ -145,6 +145,9 @@ async def create_stream_chat_completion_response(
|
||||||
if tools:
|
if tools:
|
||||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream function calls.")
|
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream function calls.")
|
||||||
|
|
||||||
|
if request.n > 1:
|
||||||
|
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Cannot stream multiple responses.")
|
||||||
|
|
||||||
yield _create_stream_chat_completion_chunk(
|
yield _create_stream_chat_completion_chunk(
|
||||||
completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(role=Role.ASSISTANT, content="")
|
completion_id=completion_id, model=request.model, delta=ChatCompletionMessage(role=Role.ASSISTANT, content="")
|
||||||
)
|
)
|
||||||
|
@ -156,7 +159,7 @@ async def create_stream_chat_completion_response(
|
||||||
temperature=request.temperature,
|
temperature=request.temperature,
|
||||||
top_p=request.top_p,
|
top_p=request.top_p,
|
||||||
max_new_tokens=request.max_tokens,
|
max_new_tokens=request.max_tokens,
|
||||||
stop=request.stop
|
stop=request.stop,
|
||||||
):
|
):
|
||||||
if len(new_token) != 0:
|
if len(new_token) != 0:
|
||||||
yield _create_stream_chat_completion_chunk(
|
yield _create_stream_chat_completion_chunk(
|
||||||
|
|
|
@ -77,8 +77,8 @@ class ChatCompletionRequest(BaseModel):
|
||||||
top_p: Optional[float] = None
|
top_p: Optional[float] = None
|
||||||
n: int = 1
|
n: int = 1
|
||||||
max_tokens: Optional[int] = None
|
max_tokens: Optional[int] = None
|
||||||
|
stop: Optional[Union[str, List[str]]] = None
|
||||||
stream: bool = False
|
stream: bool = False
|
||||||
stop: Union[Optional[str], List[str]] = None
|
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionResponseChoice(BaseModel):
|
class ChatCompletionResponseChoice(BaseModel):
|
||||||
|
|
|
@ -73,6 +73,10 @@ class HuggingfaceEngine(BaseEngine):
|
||||||
repetition_penalty = input_kwargs.pop("repetition_penalty", None)
|
repetition_penalty = input_kwargs.pop("repetition_penalty", None)
|
||||||
max_length = input_kwargs.pop("max_length", None)
|
max_length = input_kwargs.pop("max_length", None)
|
||||||
max_new_tokens = input_kwargs.pop("max_new_tokens", None)
|
max_new_tokens = input_kwargs.pop("max_new_tokens", None)
|
||||||
|
stop = input_kwargs.pop("stop", None)
|
||||||
|
|
||||||
|
if stop is not None:
|
||||||
|
raise ValueError("Stop parameter is not supported in Huggingface engine yet.")
|
||||||
|
|
||||||
generating_args.update(
|
generating_args.update(
|
||||||
dict(
|
dict(
|
||||||
|
|
Loading…
Reference in New Issue