AI-Writie-Assistant/server/llm_client.py

"""LLM client — OpenAI-compatible completions, streaming, and VLM."""
from __future__ import annotations

import json
import logging
from typing import AsyncIterator

import httpx

logger = logging.getLogger("engimind.llm")


class LLMClient:
    def __init__(self):
        self._client = httpx.AsyncClient(timeout=120.0)

    async def complete(self, base_url: str, api_key: str, model: str,
                       messages: list[dict]) -> dict:
        """Non-streaming completion."""
        url = base_url.rstrip("/") + "/v1/chat/completions"
        headers = {"Content-Type": "application/json"}
        if api_key:
            headers["Authorization"] = f"Bearer {api_key}"

        body = {"model": model, "messages": messages, "stream": False}
        resp = await self._client.post(url, json=body, headers=headers)
        resp.raise_for_status()
        return resp.json()

    async def stream_complete(self, base_url: str, api_key: str, model: str,
                              messages: list[dict]) -> AsyncIterator[dict]:
        """Streaming completion yielding {'type': 'content'|'thinking', 'text': str}."""
        url = base_url.rstrip("/") + "/v1/chat/completions"
        headers = {"Content-Type": "application/json", "Accept": "text/event-stream"}
        if api_key:
            headers["Authorization"] = f"Bearer {api_key}"

        body = {"model": model, "messages": messages, "stream": True}
        async with self._client.stream("POST", url, json=body, headers=headers) as resp:
            resp.raise_for_status()
            async for line in resp.aiter_lines():
                if not line.startswith("data: "):
                    continue
                data = line[6:]
                if data == "[DONE]":
                    break
                try:
                    chunk = json.loads(data)
                    if chunk.get("choices"):
                        delta = chunk["choices"][0].get("delta", {})
                        if delta.get("reasoning_content"):
                            yield {"type": "thinking", "text": delta["reasoning_content"]}
                        if delta.get("content"):
                            yield {"type": "content", "text": delta["content"]}
                except json.JSONDecodeError:
                    continue

    async def describe_image(self, base_url: str, api_key: str, model: str,
                             base64_data_uri: str, prompt: str = "") -> str:
        """Send an image to a VLM and return a text description."""
        if not prompt:
            prompt = "请详细描述这张图片的内容，包括图表数据、文字信息和视觉元素。用中文回答。"

        url = base_url.rstrip("/")
        if not url.endswith("/v1"):
            url += "/v1"
        url += "/chat/completions"

        headers = {"Content-Type": "application/json"}
        if api_key:
            headers["Authorization"] = f"Bearer {api_key}"

        body = {
            "model": model,
            "messages": [{
                "role": "user",
                "content": [
                    {"type": "image_url", "image_url": {"url": base64_data_uri}},
                    {"type": "text", "text": prompt},
                ]
            }],
            "stream": False,
        }
        resp = await self._client.post(url, json=body, headers=headers)
        resp.raise_for_status()
        result = resp.json()
        if result.get("choices"):
            return result["choices"][0]["message"]["content"]
        return ""

    async def close(self):
        await self._client.aclose()


# Singleton
llm_client = LLMClient()