diff --git a/src/llmtuner/api/app.py b/src/llmtuner/api/app.py
index 856b936e..4ac08608 100644
--- a/src/llmtuner/api/app.py
+++ b/src/llmtuner/api/app.py
@@ -128,7 +128,7 @@ def create_app(chat_model: "ChatModel") -> "FastAPI":
     async def predict(query: str, history: List[Tuple[str, str]], system: str, request: ChatCompletionRequest):
         choice_data = ChatCompletionResponseStreamChoice(
             index=0,
-            delta=DeltaMessage(role=Role.ASSISTANT),
+            delta=DeltaMessage(role=Role.ASSISTANT, content=""),
             finish_reason=None
         )
         chunk = ChatCompletionStreamResponse(model=request.model, choices=[choice_data])