97 lines
3.6 KiB
Python
97 lines
3.6 KiB
Python
"""LLM client — OpenAI-compatible completions, streaming, and VLM."""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
from typing import AsyncIterator
|
|
|
|
import httpx
|
|
|
|
logger = logging.getLogger("engimind.llm")
|
|
|
|
|
|
class LLMClient:
|
|
def __init__(self):
|
|
self._client = httpx.AsyncClient(timeout=120.0)
|
|
|
|
async def complete(self, base_url: str, api_key: str, model: str,
|
|
messages: list[dict]) -> dict:
|
|
"""Non-streaming completion."""
|
|
url = base_url.rstrip("/") + "/v1/chat/completions"
|
|
headers = {"Content-Type": "application/json"}
|
|
if api_key:
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
|
|
body = {"model": model, "messages": messages, "stream": False}
|
|
resp = await self._client.post(url, json=body, headers=headers)
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
|
|
async def stream_complete(self, base_url: str, api_key: str, model: str,
|
|
messages: list[dict]) -> AsyncIterator[dict]:
|
|
"""Streaming completion yielding {'type': 'content'|'thinking', 'text': str}."""
|
|
url = base_url.rstrip("/") + "/v1/chat/completions"
|
|
headers = {"Content-Type": "application/json", "Accept": "text/event-stream"}
|
|
if api_key:
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
|
|
body = {"model": model, "messages": messages, "stream": True}
|
|
async with self._client.stream("POST", url, json=body, headers=headers) as resp:
|
|
resp.raise_for_status()
|
|
async for line in resp.aiter_lines():
|
|
if not line.startswith("data: "):
|
|
continue
|
|
data = line[6:]
|
|
if data == "[DONE]":
|
|
break
|
|
try:
|
|
chunk = json.loads(data)
|
|
if chunk.get("choices"):
|
|
delta = chunk["choices"][0].get("delta", {})
|
|
if delta.get("reasoning_content"):
|
|
yield {"type": "thinking", "text": delta["reasoning_content"]}
|
|
if delta.get("content"):
|
|
yield {"type": "content", "text": delta["content"]}
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
async def describe_image(self, base_url: str, api_key: str, model: str,
|
|
base64_data_uri: str, prompt: str = "") -> str:
|
|
"""Send an image to a VLM and return a text description."""
|
|
if not prompt:
|
|
prompt = "请详细描述这张图片的内容,包括图表数据、文字信息和视觉元素。用中文回答。"
|
|
|
|
url = base_url.rstrip("/")
|
|
if not url.endswith("/v1"):
|
|
url += "/v1"
|
|
url += "/chat/completions"
|
|
|
|
headers = {"Content-Type": "application/json"}
|
|
if api_key:
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
|
|
body = {
|
|
"model": model,
|
|
"messages": [{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "image_url", "image_url": {"url": base64_data_uri}},
|
|
{"type": "text", "text": prompt},
|
|
]
|
|
}],
|
|
"stream": False,
|
|
}
|
|
resp = await self._client.post(url, json=body, headers=headers)
|
|
resp.raise_for_status()
|
|
result = resp.json()
|
|
if result.get("choices"):
|
|
return result["choices"][0]["message"]["content"]
|
|
return ""
|
|
|
|
async def close(self):
|
|
await self._client.aclose()
|
|
|
|
|
|
# Singleton
|
|
llm_client = LLMClient()
|