#!/usr/bin/env python3 """最小 OpenAI 兼容 mock 服务 —— 无真实 key 时验证 LLM Pool 流式链路。 - GET /models → 200(供"测试连接") - POST /chat/completions → SSE 流式,回复里回显注入的画像,证明记忆真进了模型上下文。 用法: python3 scripts/mock_openai.py 11999 """ import json import sys import time from http.server import BaseHTTPRequestHandler, HTTPServer class H(BaseHTTPRequestHandler): def log_message(self, *a): pass def do_GET(self): if self.path.endswith("/models"): body = json.dumps({"object": "list", "data": [{"id": "mock-model"}]}).encode() self.send_response(200) self.send_header("Content-Type", "application/json") self.send_header("Content-Length", str(len(body))) self.end_headers() self.wfile.write(body) else: self.send_response(404) self.end_headers() def do_POST(self): n = int(self.headers.get("Content-Length", 0)) req = json.loads(self.rfile.read(n) or b"{}") msgs = req.get("messages", []) system = next((m["content"] for m in msgs if m.get("role") == "system"), "") # 从 system 里抽出"称呼",证明画像注入真到了模型这层。 who = "(未知)" for line in system.replace(":", ":").splitlines(): if "称呼" in line: who = line.split(":")[-1].strip() reply = f"你好{who},这是来自【在线模型 mock】的真实流式回复——我已读取你的偏好与历史并据此作答。" sys.stderr.write(f"[mock-openai] POST /chat/completions 命中, 注入称呼={who}\n") sys.stderr.flush() self.send_response(200) self.send_header("Content-Type", "text/event-stream") self.end_headers() time.sleep(0.8) # 模拟 TTFT,给 SSE 客户端订阅时间(便于演示捕获) for ch in reply: chunk = {"choices": [{"delta": {"content": ch}}]} self.wfile.write(f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n".encode()) self.wfile.flush() time.sleep(0.02) self.wfile.write(b"data: [DONE]\n\n") self.wfile.flush() if __name__ == "__main__": port = int(sys.argv[1]) if len(sys.argv) > 1 else 11999 print(f"[mock-openai] listening on :{port}") HTTPServer(("127.0.0.1", port), H).serve_forever()