d623b8590e
混合检索从 2 路(向量+全文)升级为 3 路(+图谱)。入库时 LLM 抽实体/关系建 Neo4j 图,检索时图谱路(实体关联三元组)融进 RRF;UI 可视化图谱。 - mcp-go rag: chat.go(OpenAI 兼容非流式 chat 客户端,抽取用) + graph.go(neo4j-go-driver 连接 + LLM 抽三元组 + MERGE 实体/关系 + 图谱召回/全量三元组) + rag.go(Config 结构; graph+chat 路;Ingest 加 抽实体/写Neo4j 阶段;Search 三路 RRF 融合;SetChat 热更新) - mcp-go: Neo4j env(默认 neo4j://localhost:7687, neo4j/sundynix);订阅 chat 控制面配置 (复用 DeepSeek 做抽取);新工具 kb_graph(返回三元组) - gateway: GET /api/v1/kb/graph;frontend KbView 知识图谱面板(实体—关系→实体) - 验证: 全模块 build✓ + e2e PASS; live——入库'sundynix用Milvus...'→DeepSeek 抽 4 三元组 →Neo4j(8 实体);检索三路融合 向量=4 全文=2 图谱=1;浏览器图谱面板渲染 4 三元组 - 边界: 实体链接用 CONTAINS 朴素匹配(可升级 LLM 查询实体抽取);全文/图谱重启随入库重建 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
224 lines
6.5 KiB
Go
224 lines
6.5 KiB
Go
package handler
|
||
|
||
import (
|
||
"context"
|
||
"crypto/rand"
|
||
"encoding/base64"
|
||
"encoding/hex"
|
||
"encoding/json"
|
||
"errors"
|
||
"io"
|
||
"net/http"
|
||
"path/filepath"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/gin-gonic/gin"
|
||
|
||
"github.com/sundynix/sundynix-shared/contract"
|
||
)
|
||
|
||
// KbIngest: POST /api/v1/kb/ingest —— 文本入库(异步,返回 job_id;进度经 SSE 看)。
|
||
func (h *Handler) KbIngest(c *gin.Context) {
|
||
var body struct {
|
||
KB string `json:"kb"`
|
||
Text string `json:"text"`
|
||
}
|
||
if err := c.ShouldBindJSON(&body); err != nil || body.Text == "" {
|
||
c.JSON(http.StatusBadRequest, gin.H{"error": "text required"})
|
||
return
|
||
}
|
||
job := newJobID()
|
||
go h.runIngest(job, body.KB, "", nil, body.Text)
|
||
c.JSON(http.StatusAccepted, gin.H{"job_id": job})
|
||
}
|
||
|
||
// KbIngestFile: POST /api/v1/kb/ingest_file(multipart)—— 文件入库(异步,返回 job_id)。
|
||
// 流水线(解析→切块→向量化→写入)的进度经 sundynix.streams.<job_id> 回流,UI 用 SSE 看。
|
||
func (h *Handler) KbIngestFile(c *gin.Context) {
|
||
kb := c.PostForm("kb")
|
||
fh, err := c.FormFile("file")
|
||
if err != nil {
|
||
c.JSON(http.StatusBadRequest, gin.H{"error": "file required"})
|
||
return
|
||
}
|
||
f, err := fh.Open()
|
||
if err != nil {
|
||
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
|
||
return
|
||
}
|
||
defer f.Close()
|
||
data, err := io.ReadAll(f)
|
||
if err != nil {
|
||
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
|
||
return
|
||
}
|
||
job := newJobID()
|
||
go h.runIngest(job, kb, fh.Filename, data, "")
|
||
c.JSON(http.StatusAccepted, gin.H{"job_id": job, "file": fh.Filename})
|
||
}
|
||
|
||
// runIngest 后台跑入库流水线,逐阶段把进度发到 sundynix.streams.<job>。
|
||
// filename 非空表示文件入库(先经 mcp-py 解析);否则用 rawText。
|
||
func (h *Handler) runIngest(job, kb, filename string, data []byte, rawText string) {
|
||
ctx := context.Background()
|
||
emit := func(ev contract.IngestEvent) { _ = h.bus.PublishIngest(job, &ev) }
|
||
time.Sleep(400 * time.Millisecond) // 给 SSE 客户端订阅时间(core NATS 无缓冲)
|
||
|
||
text := rawText
|
||
if filename != "" {
|
||
emit(contract.IngestEvent{Stage: "解析", Msg: filename})
|
||
parsed, err := h.parseFile(ctx, filename, data)
|
||
if err != nil {
|
||
emit(contract.IngestEvent{Stage: "失败", Error: "解析失败: " + err.Error()})
|
||
_ = h.bus.CompleteStream(job)
|
||
return
|
||
}
|
||
emit(contract.IngestEvent{Stage: "解析完成", Msg: "解析出 " + itoa(len([]rune(parsed))) + " 字"})
|
||
text = parsed
|
||
}
|
||
|
||
// 调 mcp-go kb_ingest(带 job_id):它会发 切块/向量化/写入/完成 事件 + CompleteStream。
|
||
res, err := h.bus.CallTool(ctx, contract.ToolSubjectGo("kb_ingest"),
|
||
&contract.ToolCall{Tool: "kb_ingest", Args: map[string]any{"kb": kb, "text": text, "job_id": job}})
|
||
if err != nil || res == nil || !res.OK {
|
||
msg := "kb_ingest 失败"
|
||
if err != nil {
|
||
msg = err.Error()
|
||
} else if res != nil {
|
||
msg = res.Error
|
||
}
|
||
emit(contract.IngestEvent{Stage: "失败", Error: msg})
|
||
_ = h.bus.CompleteStream(job)
|
||
}
|
||
}
|
||
|
||
// KbIngestStream: GET /api/v1/kb/ingest/:id/stream —— SSE 实时推送入库进度事件。
|
||
func (h *Handler) KbIngestStream(c *gin.Context) {
|
||
job := c.Param("id")
|
||
c.Writer.Header().Set("Content-Type", "text/event-stream")
|
||
c.Writer.Header().Set("Cache-Control", "no-cache")
|
||
c.Writer.Header().Set("Connection", "keep-alive")
|
||
|
||
events := make(chan []byte, 64)
|
||
done := make(chan struct{})
|
||
unsub, err := h.bus.SubscribeTokens(job,
|
||
func(ev []byte) {
|
||
select {
|
||
case events <- ev:
|
||
default:
|
||
}
|
||
},
|
||
func() { close(done) },
|
||
)
|
||
if err != nil {
|
||
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
|
||
return
|
||
}
|
||
defer func() { _ = unsub() }()
|
||
|
||
c.Stream(func(w io.Writer) bool {
|
||
select {
|
||
case ev := <-events:
|
||
c.SSEvent("progress", string(ev))
|
||
return true
|
||
case <-done:
|
||
c.SSEvent("done", job)
|
||
return false
|
||
case <-c.Request.Context().Done():
|
||
return false
|
||
}
|
||
})
|
||
}
|
||
|
||
func newJobID() string {
|
||
var b [8]byte
|
||
_, _ = rand.Read(b[:])
|
||
return "ingest_" + hex.EncodeToString(b[:])
|
||
}
|
||
|
||
// itoa 简易整数转字符串(避免引入 strconv)。
|
||
func itoa(n int) string {
|
||
if n == 0 {
|
||
return "0"
|
||
}
|
||
neg := n < 0
|
||
if neg {
|
||
n = -n
|
||
}
|
||
var b []byte
|
||
for n > 0 {
|
||
b = append([]byte{byte('0' + n%10)}, b...)
|
||
n /= 10
|
||
}
|
||
if neg {
|
||
b = append([]byte{'-'}, b...)
|
||
}
|
||
return string(b)
|
||
}
|
||
|
||
// parseFile 把文件字节转为纯文本:文本类直读,其余经 mcp-py parse_document(算法层)。
|
||
func (h *Handler) parseFile(ctx context.Context, filename string, data []byte) (string, error) {
|
||
switch strings.ToLower(filepath.Ext(filename)) {
|
||
case ".txt", ".md", ".markdown", ".text":
|
||
return string(data), nil
|
||
}
|
||
res, err := h.bus.CallTool(ctx, contract.ToolSubjectPy("parse_document"),
|
||
&contract.ToolCall{Tool: "parse_document", Args: map[string]any{
|
||
"filename": filename, "content_b64": base64.StdEncoding.EncodeToString(data),
|
||
}})
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
if res == nil || !res.OK {
|
||
if res != nil && res.Error != "" {
|
||
return "", errors.New(res.Error)
|
||
}
|
||
return "", errors.New("parse_document 无响应(mcp-py 未运行?)")
|
||
}
|
||
return res.Content, nil
|
||
}
|
||
|
||
// KbSearch: POST /api/v1/kb/search —— 检索台:查某知识库,返回带分数的命中(→ mcp-go kb_search)。
|
||
func (h *Handler) KbSearch(c *gin.Context) {
|
||
var body struct {
|
||
KB string `json:"kb"`
|
||
Q string `json:"q"`
|
||
TopK int `json:"topK"`
|
||
}
|
||
if err := c.ShouldBindJSON(&body); err != nil || body.Q == "" {
|
||
c.JSON(http.StatusBadRequest, gin.H{"error": "q required"})
|
||
return
|
||
}
|
||
args := map[string]any{"kb": body.KB, "q": body.Q}
|
||
if body.TopK > 0 {
|
||
args["topK"] = body.TopK
|
||
}
|
||
res, err := h.bus.CallTool(c.Request.Context(), contract.ToolSubjectGo("kb_search"),
|
||
&contract.ToolCall{Tool: "kb_search", Args: args})
|
||
if err != nil {
|
||
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
|
||
return
|
||
}
|
||
if !res.OK {
|
||
c.JSON(http.StatusUnprocessableEntity, gin.H{"error": res.Error})
|
||
return
|
||
}
|
||
var hits []map[string]any
|
||
_ = json.Unmarshal([]byte(res.Content), &hits)
|
||
c.JSON(http.StatusOK, gin.H{"hits": hits})
|
||
}
|
||
|
||
// KbGraph: GET /api/v1/kb/graph?kb= —— 某知识库的图谱三元组(→ mcp-go kb_graph,Neo4j)。
|
||
func (h *Handler) KbGraph(c *gin.Context) {
|
||
res, err := h.bus.CallTool(c.Request.Context(), contract.ToolSubjectGo("kb_graph"),
|
||
&contract.ToolCall{Tool: "kb_graph", Args: map[string]any{"kb": c.Query("kb"), "limit": 100}})
|
||
if err != nil {
|
||
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
|
||
return
|
||
}
|
||
var triples []map[string]any
|
||
_ = json.Unmarshal([]byte(res.Content), &triples)
|
||
c.JSON(http.StatusOK, gin.H{"triples": triples})
|
||
}
|