3c65189f30
后端从占位回显变为真实生成:管理员经控制面登记/激活模型,Gateway 经 NATS 下发,Dispatcher 热更新 LLM Pool,Eino 图用 OpenAI 兼容流式真实推理。 - shared: contract.ModelConfig(provider/base_url/api_key/model) + 配置 subjects; bus.RequestModelConfig/ServeModelConfig/Publish/Subscribe ModelConfigUpdated - gateway: store.LLMModel→sundynix_model(AutoMigrate,唯一激活) + admin REST (GET/POST/active/delete/test models, api_key 脱敏) + main ServeModelConfig + 变更广播; 路由 /api/v1/admin/models* - dispatcher: llm.Pool OpenAI 兼容 SSE 流式客户端(ChatStream) + 热更新配置 + 未配置则降级桩; poolModel.Ready()?真实流式:注入记忆的桩; main 取配置+订阅 - 开发期接在线 API 不拉本地模型(见 llm-provider-strategy memory) - 验证: 4 模块 build✓ + e2e PASS; mock OpenAI 服务 live 跑通——登记/测试连接✓/ 激活→NATS 热更新→提交→真实 SSE 流出 mock 回复, mock 日志证明端点被调用且 注入画像(老王)进了模型上下文 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
60 lines
2.4 KiB
Python
60 lines
2.4 KiB
Python
#!/usr/bin/env python3
|
||
"""最小 OpenAI 兼容 mock 服务 —— 无真实 key 时验证 LLM Pool 流式链路。
|
||
- GET /models → 200(供"测试连接")
|
||
- POST /chat/completions → SSE 流式,回复里回显注入的画像,证明记忆真进了模型上下文。
|
||
用法: python3 scripts/mock_openai.py 11999
|
||
"""
|
||
import json
|
||
import sys
|
||
import time
|
||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||
|
||
|
||
class H(BaseHTTPRequestHandler):
|
||
def log_message(self, *a):
|
||
pass
|
||
|
||
def do_GET(self):
|
||
if self.path.endswith("/models"):
|
||
body = json.dumps({"object": "list", "data": [{"id": "mock-model"}]}).encode()
|
||
self.send_response(200)
|
||
self.send_header("Content-Type", "application/json")
|
||
self.send_header("Content-Length", str(len(body)))
|
||
self.end_headers()
|
||
self.wfile.write(body)
|
||
else:
|
||
self.send_response(404)
|
||
self.end_headers()
|
||
|
||
def do_POST(self):
|
||
n = int(self.headers.get("Content-Length", 0))
|
||
req = json.loads(self.rfile.read(n) or b"{}")
|
||
msgs = req.get("messages", [])
|
||
system = next((m["content"] for m in msgs if m.get("role") == "system"), "")
|
||
# 从 system 里抽出"称呼",证明画像注入真到了模型这层。
|
||
who = "(未知)"
|
||
for line in system.replace(":", ":").splitlines():
|
||
if "称呼" in line:
|
||
who = line.split(":")[-1].strip()
|
||
reply = f"你好{who},这是来自【在线模型 mock】的真实流式回复——我已读取你的偏好与历史并据此作答。"
|
||
sys.stderr.write(f"[mock-openai] POST /chat/completions 命中, 注入称呼={who}\n")
|
||
sys.stderr.flush()
|
||
|
||
self.send_response(200)
|
||
self.send_header("Content-Type", "text/event-stream")
|
||
self.end_headers()
|
||
time.sleep(0.8) # 模拟 TTFT,给 SSE 客户端订阅时间(便于演示捕获)
|
||
for ch in reply:
|
||
chunk = {"choices": [{"delta": {"content": ch}}]}
|
||
self.wfile.write(f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n".encode())
|
||
self.wfile.flush()
|
||
time.sleep(0.02)
|
||
self.wfile.write(b"data: [DONE]\n\n")
|
||
self.wfile.flush()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
port = int(sys.argv[1]) if len(sys.argv) > 1 else 11999
|
||
print(f"[mock-openai] listening on :{port}")
|
||
HTTPServer(("127.0.0.1", port), H).serve_forever()
|