From 9c19bb44f1a3e23fde2920e7a5b9e5de043435d5 Mon Sep 17 00:00:00 2001 From: Blizzard Date: Thu, 18 Jun 2026 12:47:49 +0800 Subject: [PATCH] =?UTF-8?q?feat(dispatcher):=20=E9=95=BF=E6=9C=9F=E5=81=8F?= =?UTF-8?q?=E5=A5=BD=E8=AE=B0=E5=BF=86=E6=8A=BD=E5=8F=96=EF=BC=88=E8=A1=A5?= =?UTF-8?q?=E5=85=A8=E8=AE=B0=E5=BF=86=E9=97=AD=E7=8E=AF=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit memorize 的 TODO 落地:写回阶段(异步、离热路径)从本轮对话用 LLM 抽取用户 长期稳定偏好 → 与已有画像去重 → memory_upsert 登记。 - extractMemory:模型/工具不可用或输入过短则跳过;复用 llmCtx 超时; 抽取 prompt 只取长期偏好、忽略一次性信息。 - 纯逻辑(可单测):parsePrefs(容忍 json 代码围栏)、parseProfile(把 memory_get 渲染的"- 维度:值"解析回 map,兼容全/半角冒号)、filterNewPrefs(新增/变更才留, 同批同 key 去重)。 - 单测覆盖三者;LLM 抽取调用沿用已验证的 pool.Chat 模式。 至此记忆闭环:召回(memory_get) + 历史写回 + 偏好自动抽取 全通。 Co-Authored-By: Claude Opus 4.8 --- .../internal/eino/memory_extract.go | 113 ++++++++++++++++++ .../internal/eino/memory_extract_test.go | 43 +++++++ .../internal/eino/orchestrator.go | 4 +- 3 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 sundynix-dispatcher/internal/eino/memory_extract.go create mode 100644 sundynix-dispatcher/internal/eino/memory_extract_test.go diff --git a/sundynix-dispatcher/internal/eino/memory_extract.go b/sundynix-dispatcher/internal/eino/memory_extract.go new file mode 100644 index 0000000..ad1e6ae --- /dev/null +++ b/sundynix-dispatcher/internal/eino/memory_extract.go @@ -0,0 +1,113 @@ +package eino + +import ( + "context" + "encoding/json" + "fmt" + "log" + "strings" + + "github.com/sundynix/sundynix-dispatcher/internal/llm" + "github.com/sundynix/sundynix-shared/contract" +) + +// Pref 是从对话抽取出的一条长期偏好(key/value)。 +type Pref struct { + Key string `json:"key"` + Value string `json:"value"` +} + +// extractMemory 写回阶段(异步、离开热路径):从本轮对话用 LLM 抽取用户长期偏好, +// 与已有画像去重后经 memory_upsert 登记。模型/工具不可用或输入过短则跳过。 +func (o *Orchestrator) extractMemory(ctx context.Context, uid, input, answer string) { + if uid == "" || o.tools == nil || o.pool == nil || !o.pool.Ready() { + return + } + if len([]rune(strings.TrimSpace(input))) < 2 || len([]rune(strings.TrimSpace(answer))) < 20 { + return // 太短,不值得抽取 + } + existing := parseProfile(o.fetchMemory(ctx, uid, "")) + + cctx, cancel := llmCtx(ctx) + defer cancel() + sys := "你从对话中提取【用户的长期稳定偏好或事实】(如称呼、语言、职业、专业领域、口味、常用工具、固定要求等)," + + "忽略一次性的临时信息与你自己的话。" + user := fmt.Sprintf("用户输入:%s\n助手回答:%s\n请抽取。只输出 JSON 数组 [{\"key\":\"偏好维度\",\"value\":\"值\"}],"+ + "没有可抽取的就输出 [],不要任何多余文字。", truncate(input, 800), truncate(answer, 1200)) + txt, err := o.pool.Chat(cctx, []llm.ChatMessage{{Role: "system", Content: sys}, {Role: "user", Content: user}}) + if err != nil { + log.Printf("[eino] (writeback) 偏好抽取失败 user=%s: %v", uid, err) + return + } + fresh := filterNewPrefs(parsePrefs(txt), existing) + for _, p := range fresh { + o.upsertMemory(ctx, uid, p.Key, p.Value) + } + if len(fresh) > 0 { + log.Printf("[eino] (writeback) 已登记 %d 条新偏好 user=%s", len(fresh), uid) + } +} + +// upsertMemory 经 mcp-go memory_upsert 工具登记一条偏好。 +func (o *Orchestrator) upsertMemory(ctx context.Context, uid, key, value string) { + cctx, cancel := context.WithTimeout(ctx, toolCallTimeout) + defer cancel() + if _, err := o.tools.CallTool(cctx, contract.ToolSubjectGo("memory_upsert"), + &contract.ToolCall{Tool: "memory_upsert", Args: map[string]any{"user_id": uid, "key": key, "value": value}}); err != nil { + log.Printf("[eino] memory_upsert 失败 %s=%s: %v", key, value, err) + } +} + +// parsePrefs 解析 LLM 抽取结果(容忍 ```json 围栏)为 []Pref,过滤空项。 +func parsePrefs(txt string) []Pref { + var ps []Pref + if json.Unmarshal([]byte(stripFence(txt)), &ps) != nil { + return nil + } + out := make([]Pref, 0, len(ps)) + for _, p := range ps { + p.Key, p.Value = strings.TrimSpace(p.Key), strings.TrimSpace(p.Value) + if p.Key != "" && p.Value != "" { + out = append(out, p) + } + } + return out +} + +// parseProfile 把 memory_get 渲染的画像("- 维度:值" 多行)解析回 map,供去重。 +func parseProfile(s string) map[string]string { + m := map[string]string{} + for _, line := range strings.Split(s, "\n") { + line = strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(line), "-")) + if line == "" { + continue + } + for _, sep := range []string{":", ":"} { // 兼容全角/半角冒号 + if i := strings.Index(line, sep); i > 0 { + k := strings.TrimSpace(line[:i]) + if k != "" { + m[k] = strings.TrimSpace(line[i+len(sep):]) + } + break + } + } + } + return m +} + +// filterNewPrefs 保留新增或值有变化的偏好(同批同 key 去重;已有且相同则跳过)。 +func filterNewPrefs(extracted []Pref, existing map[string]string) []Pref { + out := make([]Pref, 0, len(extracted)) + seen := map[string]bool{} + for _, p := range extracted { + if seen[p.Key] { + continue + } + seen[p.Key] = true + if cur, ok := existing[p.Key]; ok && cur == p.Value { + continue + } + out = append(out, p) + } + return out +} diff --git a/sundynix-dispatcher/internal/eino/memory_extract_test.go b/sundynix-dispatcher/internal/eino/memory_extract_test.go new file mode 100644 index 0000000..9b6282f --- /dev/null +++ b/sundynix-dispatcher/internal/eino/memory_extract_test.go @@ -0,0 +1,43 @@ +package eino + +import "testing" + +func TestParsePrefs(t *testing.T) { + got := parsePrefs("```json\n[{\"key\":\"称呼\",\"value\":\"Dexter\"},{\"key\":\"语言\",\"value\":\"中文\"},{\"key\":\"\",\"value\":\"空\"}]\n```") + if len(got) != 2 { + t.Fatalf("应解析出 2 条(过滤空 key),got %d: %v", len(got), got) + } + if got[0].Key != "称呼" || got[0].Value != "Dexter" { + t.Errorf("解析错: %v", got[0]) + } + if parsePrefs("不是 JSON") != nil { + t.Error("非 JSON 应返回 nil") + } +} + +func TestParseProfile(t *testing.T) { + m := parseProfile("- 称呼:Dexter\n- 语言: 中文\n\n- 职业:律师") + if m["称呼"] != "Dexter" || m["语言"] != "中文" || m["职业"] != "律师" { + t.Errorf("画像解析错: %v", m) + } + if len(parseProfile("")) != 0 { + t.Error("空画像应得空 map") + } +} + +func TestFilterNewPrefs(t *testing.T) { + existing := map[string]string{"称呼": "Dexter", "语言": "中文"} + in := []Pref{ + {"称呼", "Dexter"}, // 已有且相同 → 跳 + {"语言", "英文"}, // 已有但变了 → 留 + {"职业", "律师"}, // 新 → 留 + {"职业", "工程师"}, // 同批重复 key → 跳(保留首个) + } + got := filterNewPrefs(in, existing) + if len(got) != 2 { + t.Fatalf("应剩 2 条(语言变更 + 新职业),got %d: %v", len(got), got) + } + if got[0].Key != "语言" || got[0].Value != "英文" || got[1].Key != "职业" || got[1].Value != "律师" { + t.Errorf("过滤结果不符: %v", got) + } +} diff --git a/sundynix-dispatcher/internal/eino/orchestrator.go b/sundynix-dispatcher/internal/eino/orchestrator.go index 774edb7..03b5bcb 100644 --- a/sundynix-dispatcher/internal/eino/orchestrator.go +++ b/sundynix-dispatcher/internal/eino/orchestrator.go @@ -172,8 +172,8 @@ func (o *Orchestrator) memorize(t *contract.Task, answer string) { log.Printf("[eino] (writeback) task %s 已落会话历史 session=%s", t.ID, sid) } if uid != "" { - log.Printf("[eino] (writeback) task %s 待抽取 user=%s 的新偏好记忆", t.ID, uid) - // TODO: 抽取 LLM → 去重/更新 → memory_upsert + // 从本轮对话抽取长期偏好 → 去重 → memory_upsert(离开热路径,已在 goroutine 内)。 + o.extractMemory(context.Background(), uid, dsl.Compile(t.Graph).Query, answer) } }