feat: 第一张真实 Eino 图 + 偏好记忆（让模型知道是我）

dispatcher 不再手搓 pool.Stream，改用编译好的 Eino 图驱动；接入用户常驻画像，推理前召回并注入 system prompt，实现个性化（架构'心脏'首次真跳）。 Eino 图(dispatcher/internal/eino): START→recall→prompt→model→END + 全局 State - recall(Lambda): 取 Meta[user_id] → 调 MCP memory_get → ProcessState 写画像 - prompt(ChatTemplate): {profile} 注入 system，{query} 作 user - model: poolModel 适配 LLM Pool 为 model.BaseChatModel(Generate+Stream, schema.Pipe) - 写回: 流排空后异步 memorize（流式节点走 OnEndWithStreamOutput 非 OnEndFn）记忆存储(mcp-go owns): GORM Profile→sundynix_user_profile(复合主键, AutoMigrate, 遵守前缀约定), 新工具 memory_get/memory_upsert, 连不上降级 Gateway: SubmitTask 注入 Meta[user_id](X-User-ID 头), PUT /api/v1/memory→memory_upsert shared: contract.MetaUserID; llm.Pool 拆出 StreamText 验证: 4 模块 build✓ + 3 e2e PASS; live 跑通——PUT 偏好落 sundynix_user_profile, 带 X-User-ID 提交→Eino recall 召回→注入→SSE 流出含画像的个性化回答, writeback 触发 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-10 14:06:18 +08:00
parent a67604f4b7
commit cbd130ecae
19 changed files with 638 additions and 57 deletions
@@ -23,15 +23,20 @@ const (
 // 真实接入 vLLM/Ollama 时替换为后端 streaming API 即可（回调签名不变）。
 func (p *Pool) Stream(ctx context.Context, prompt string, onToken func([]byte)) error {
 	// TODO: 选路 (least-load / 模型亲和) → 调 vLLM/Ollama streaming API
-	reply := buildReply(prompt)
+	return p.StreamText(ctx, buildReply(prompt), onToken)
+}

+// StreamText 按真实后端的 TTFT/逐 token 节奏把给定文本流式回调。
+// 把"说什么"(由上层/Eino 图决定)与"怎么流"(后端节奏)解耦：
+// 真实接入 vLLM/Ollama 后，由后端 streaming API 直接驱动，无需本方法。
+func (p *Pool) StreamText(ctx context.Context, text string, onToken func([]byte)) error {
 	select {
 	case <-ctx.Done():
 		return ctx.Err()
 	case <-time.After(timeToFirstToken): // 模拟 TTFT
 	}

-	for _, tok := range tokenize(reply) {
+	for _, tok := range tokenize(text) {
 		select {
 		case <-ctx.Done():
 			return ctx.Err()