From 3b27826bddcbc5830e1f973fd0291e9b6952f2c1 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Wed, 11 Feb 2026 15:14:29 +0800 Subject: [PATCH 1/2] fix: llm extra body bug --- src/memos/api/config.py | 4 ++-- src/memos/api/handlers/config_builders.py | 1 + src/memos/configs/llm.py | 1 + src/memos/llms/vllm.py | 16 ++-------------- 4 files changed, 6 insertions(+), 16 deletions(-) diff --git a/src/memos/api/config.py b/src/memos/api/config.py index 70d9366e3..27f77b6ab 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -335,7 +335,7 @@ def get_memreader_config() -> dict[str, Any]: # validation requirements during tests/import. "api_base": os.getenv("MEMRADER_API_BASE", "https://api.openai.com/v1"), "remove_think_prefix": True, - "extra_body": {"chat_template_kwargs": {"enable_thinking": False}}, + "extra_body": {"enable_thinking": False}, }, } @@ -531,7 +531,7 @@ def get_internet_config() -> dict[str, Any]: "api_key": os.getenv("MEMRADER_API_KEY", "EMPTY"), "api_base": os.getenv("MEMRADER_API_BASE"), "remove_think_prefix": True, - "extra_body": {"chat_template_kwargs": {"enable_thinking": False}}, + "extra_body": {"enable_thinking": False}, }, }, "embedder": APIConfig.get_embedder_config(), diff --git a/src/memos/api/handlers/config_builders.py b/src/memos/api/handlers/config_builders.py index 2b3fbdd35..7426a60ba 100644 --- a/src/memos/api/handlers/config_builders.py +++ b/src/memos/api/handlers/config_builders.py @@ -105,6 +105,7 @@ def build_chat_llm_config() -> list[dict[str, Any]]: } ), "support_models": cfg.get("support_models", None), + "extra_body": cfg.get("extra_body", None), } for cfg in configs ] diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py index 70217b896..5487d117c 100644 --- a/src/memos/configs/llm.py +++ b/src/memos/configs/llm.py @@ -116,6 +116,7 @@ class VLLMLLMConfig(BaseLLMConfig): default=False, description="Enable reasoning outputs from vLLM", ) + extra_body: Any = Field(default=None, description="Extra options for API") class LLMConfigFactory(BaseConfig): diff --git a/src/memos/llms/vllm.py b/src/memos/llms/vllm.py index 362112f11..0efead97b 100644 --- a/src/memos/llms/vllm.py +++ b/src/memos/llms/vllm.py @@ -111,13 +111,7 @@ def _generate_with_api_client(self, messages: list[MessageDict], **kwargs) -> st "temperature": kwargs.get("temperature", self.config.temperature), "max_tokens": kwargs.get("max_tokens", self.config.max_tokens), "top_p": kwargs.get("top_p", self.config.top_p), - "extra_body": { - "chat_template_kwargs": { - "enable_thinking": kwargs.get( - "enable_thinking", self.config.enable_thinking - ) - } - }, + "extra_body": kwargs.get("extra_body", self.config.extra_body), } if kwargs.get("tools"): completion_kwargs["tools"] = kwargs.get("tools") @@ -175,13 +169,7 @@ def generate_stream(self, messages: list[MessageDict], **kwargs): "max_tokens": kwargs.get("max_tokens", self.config.max_tokens), "top_p": kwargs.get("top_p", self.config.top_p), "stream": True, - "extra_body": { - "chat_template_kwargs": { - "enable_thinking": kwargs.get( - "enable_thinking", self.config.enable_thinking - ) - } - }, + "extra_body": kwargs.get("extra_body", self.config.extra_body), } stream = self.client.chat.completions.create(**completion_kwargs) From cd5272b2fa948e8b37f4cede5a02aa1550297976 Mon Sep 17 00:00:00 2001 From: "yuan.wang" Date: Thu, 12 Feb 2026 10:36:24 +0800 Subject: [PATCH 2/2] fix: fix bug --- src/memos/api/config.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/memos/api/config.py b/src/memos/api/config.py index 27f77b6ab..d2dd19266 100644 --- a/src/memos/api/config.py +++ b/src/memos/api/config.py @@ -335,7 +335,6 @@ def get_memreader_config() -> dict[str, Any]: # validation requirements during tests/import. "api_base": os.getenv("MEMRADER_API_BASE", "https://api.openai.com/v1"), "remove_think_prefix": True, - "extra_body": {"enable_thinking": False}, }, } @@ -531,7 +530,6 @@ def get_internet_config() -> dict[str, Any]: "api_key": os.getenv("MEMRADER_API_KEY", "EMPTY"), "api_base": os.getenv("MEMRADER_API_BASE"), "remove_think_prefix": True, - "extra_body": {"enable_thinking": False}, }, }, "embedder": APIConfig.get_embedder_config(),