diff --git a/api/core/model_runtime/model_providers/volcengine_maas/client.py b/api/core/model_runtime/model_providers/volcengine_maas/client.py index d6f135665..cfe21e4b9 100644 --- a/api/core/model_runtime/model_providers/volcengine_maas/client.py +++ b/api/core/model_runtime/model_providers/volcengine_maas/client.py @@ -208,11 +208,9 @@ class ArkClientV3: presence_penalty=presence_penalty, top_p=top_p, temperature=temperature, + stream_options={"include_usage": True}, ) - for chunk in chunks: - if not chunk.choices: - continue - yield chunk + yield from chunks def embeddings(self, texts: list[str]) -> CreateEmbeddingResponse: return self.ark.embeddings.create(model=self.endpoint_id, input=texts) diff --git a/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py b/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py index f8bf8fb82..dec6c9d78 100644 --- a/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py +++ b/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py @@ -239,16 +239,14 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel): def _handle_stream_chat_response(chunks: Generator[ChatCompletionChunk]) -> Generator: for chunk in chunks: - if not chunk.choices: - continue - choice = chunk.choices[0] - yield LLMResultChunk( model=model, prompt_messages=prompt_messages, delta=LLMResultChunkDelta( - index=choice.index, - message=AssistantPromptMessage(content=choice.delta.content, tool_calls=[]), + index=0, + message=AssistantPromptMessage( + content=chunk.choices[0].delta.content if chunk.choices else "", tool_calls=[] + ), usage=self._calc_response_usage( model=model, credentials=credentials, @@ -257,7 +255,7 @@ class VolcengineMaaSLargeLanguageModel(LargeLanguageModel): ) if chunk.usage else None, - finish_reason=choice.finish_reason, + finish_reason=chunk.choices[0].finish_reason if chunk.choices else None, ), )