sundynix-agentix/sundynix-dispatcher/internal/eino/memory_extract.go

package eino

import (
	"context"
	"encoding/json"
	"fmt"
	"log"
	"strings"

	"github.com/sundynix/sundynix-dispatcher/internal/llm"
	"github.com/sundynix/sundynix-shared/contract"
)

// Pref 是从对话抽取出的一条长期偏好（key/value）。
type Pref struct {
	Key   string `json:"key"`
	Value string `json:"value"`
}

// extractMemory 写回阶段（异步、离开热路径）：从本轮对话用 LLM 抽取用户长期偏好，
// 与已有画像去重后经 memory_upsert 登记。模型/工具不可用或输入过短则跳过。
func (o *Orchestrator) extractMemory(ctx context.Context, uid, input, answer string) {
	if uid == "" || o.tools == nil || o.pool == nil || !o.pool.Ready() {
		return
	}
	if len([]rune(strings.TrimSpace(input))) < 2 || len([]rune(strings.TrimSpace(answer))) < 20 {
		return // 太短，不值得抽取
	}
	existing := parseProfile(o.fetchMemory(ctx, uid, ""))

	cctx, cancel := llmCtx(ctx)
	defer cancel()
	sys := "你从对话中提取【用户的长期稳定偏好或事实】（如称呼、语言、职业、专业领域、口味、常用工具、固定要求等），" +
		"忽略一次性的临时信息与你自己的话。"
	user := fmt.Sprintf("用户输入：%s\n助手回答：%s\n请抽取。只输出 JSON 数组 [{\"key\":\"偏好维度\",\"value\":\"值\"}]，"+
		"没有可抽取的就输出 []，不要任何多余文字。", truncate(input, 800), truncate(answer, 1200))
	txt, err := o.pool.Chat(cctx, []llm.ChatMessage{{Role: "system", Content: sys}, {Role: "user", Content: user}})
	if err != nil {
		log.Printf("[eino] (writeback) 偏好抽取失败 user=%s: %v", uid, err)
		return
	}
	fresh := filterNewPrefs(parsePrefs(txt), existing)
	for _, p := range fresh {
		o.upsertMemory(ctx, uid, p.Key, p.Value)
	}
	if len(fresh) > 0 {
		log.Printf("[eino] (writeback) 已登记 %d 条新偏好 user=%s", len(fresh), uid)
	}
}

// upsertMemory 经 mcp-go memory_upsert 工具登记一条偏好。
func (o *Orchestrator) upsertMemory(ctx context.Context, uid, key, value string) {
	cctx, cancel := context.WithTimeout(ctx, toolCallTimeout)
	defer cancel()
	if _, err := o.tools.CallTool(cctx, contract.ToolSubjectGo("memory_upsert"),
		&contract.ToolCall{Tool: "memory_upsert", Args: map[string]any{"user_id": uid, "key": key, "value": value}}); err != nil {
		log.Printf("[eino] memory_upsert 失败 %s=%s: %v", key, value, err)
	}
}

// parsePrefs 解析 LLM 抽取结果（容忍 ```json 围栏）为 []Pref，过滤空项。
func parsePrefs(txt string) []Pref {
	var ps []Pref
	if json.Unmarshal([]byte(stripFence(txt)), &ps) != nil {
		return nil
	}
	out := make([]Pref, 0, len(ps))
	for _, p := range ps {
		p.Key, p.Value = strings.TrimSpace(p.Key), strings.TrimSpace(p.Value)
		if p.Key != "" && p.Value != "" {
			out = append(out, p)
		}
	}
	return out
}

// parseProfile 把 memory_get 渲染的画像（"- 维度：值" 多行）解析回 map，供去重。
func parseProfile(s string) map[string]string {
	m := map[string]string{}
	for _, line := range strings.Split(s, "\n") {
		line = strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(line), "-"))
		if line == "" {
			continue
		}
		for _, sep := range []string{"：", ":"} { // 兼容全角/半角冒号
			if i := strings.Index(line, sep); i > 0 {
				k := strings.TrimSpace(line[:i])
				if k != "" {
					m[k] = strings.TrimSpace(line[i+len(sep):])
				}
				break
			}
		}
	}
	return m
}

// filterNewPrefs 保留新增或值有变化的偏好（同批同 key 去重；已有且相同则跳过）。
func filterNewPrefs(extracted []Pref, existing map[string]string) []Pref {
	out := make([]Pref, 0, len(extracted))
	seen := map[string]bool{}
	for _, p := range extracted {
		if seen[p.Key] {
			continue
		}
		seen[p.Key] = true
		if cur, ok := existing[p.Key]; ok && cur == p.Value {
			continue
		}
		out = append(out, p)
	}
	return out
}