d623b8590e
混合检索从 2 路(向量+全文)升级为 3 路(+图谱)。入库时 LLM 抽实体/关系建 Neo4j 图,检索时图谱路(实体关联三元组)融进 RRF;UI 可视化图谱。 - mcp-go rag: chat.go(OpenAI 兼容非流式 chat 客户端,抽取用) + graph.go(neo4j-go-driver 连接 + LLM 抽三元组 + MERGE 实体/关系 + 图谱召回/全量三元组) + rag.go(Config 结构; graph+chat 路;Ingest 加 抽实体/写Neo4j 阶段;Search 三路 RRF 融合;SetChat 热更新) - mcp-go: Neo4j env(默认 neo4j://localhost:7687, neo4j/sundynix);订阅 chat 控制面配置 (复用 DeepSeek 做抽取);新工具 kb_graph(返回三元组) - gateway: GET /api/v1/kb/graph;frontend KbView 知识图谱面板(实体—关系→实体) - 验证: 全模块 build✓ + e2e PASS; live——入库'sundynix用Milvus...'→DeepSeek 抽 4 三元组 →Neo4j(8 实体);检索三路融合 向量=4 全文=2 图谱=1;浏览器图谱面板渲染 4 三元组 - 边界: 实体链接用 CONTAINS 朴素匹配(可升级 LLM 查询实体抽取);全文/图谱重启随入库重建 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
159 lines
4.6 KiB
Go
159 lines
4.6 KiB
Go
package rag
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"strings"
|
|
|
|
"github.com/neo4j/neo4j-go-driver/v5/neo4j"
|
|
"github.com/neo4j/neo4j-go-driver/v5/neo4j/auth"
|
|
)
|
|
|
|
// Triple 是一条知识三元组(主体-关系-客体)。
|
|
type Triple struct {
|
|
S string `json:"s"`
|
|
P string `json:"p"`
|
|
O string `json:"o"`
|
|
}
|
|
|
|
// graphStore 是 GraphRAG 的图路:实体/关系存 Neo4j。
|
|
type graphStore struct {
|
|
driver neo4j.DriverWithContext
|
|
}
|
|
|
|
func openGraph(ctx context.Context, uri, user, pass string) *graphStore {
|
|
if uri == "" {
|
|
return &graphStore{}
|
|
}
|
|
drv, err := neo4j.NewDriverWithContext(uri,
|
|
auth.BasicTokenManager(func(context.Context) (neo4j.AuthToken, error) {
|
|
return neo4j.BasicAuth(user, pass, ""), nil
|
|
}))
|
|
if err != nil {
|
|
log.Printf("[rag] Neo4j 连接失败,图谱路降级: %v", err)
|
|
return &graphStore{}
|
|
}
|
|
if err := drv.VerifyConnectivity(ctx); err != nil {
|
|
log.Printf("[rag] Neo4j 不可用,图谱路降级: %v", err)
|
|
return &graphStore{}
|
|
}
|
|
// 实体唯一约束(kb+name)。
|
|
_, _ = neo4j.ExecuteQuery(ctx, drv,
|
|
"CREATE CONSTRAINT entity_key IF NOT EXISTS FOR (e:Entity) REQUIRE (e.kb, e.name) IS UNIQUE",
|
|
nil, neo4j.EagerResultTransformer)
|
|
log.Printf("[rag] Neo4j connected %s", uri)
|
|
return &graphStore{driver: drv}
|
|
}
|
|
|
|
func (g *graphStore) ready() bool { return g != nil && g.driver != nil }
|
|
|
|
func (g *graphStore) close(ctx context.Context) {
|
|
if g.ready() {
|
|
_ = g.driver.Close(ctx)
|
|
}
|
|
}
|
|
|
|
// store 把三元组 MERGE 进 Neo4j(实体 + 关系,按 kb 隔离)。
|
|
func (g *graphStore) store(ctx context.Context, kb string, triples []Triple) (int, error) {
|
|
if !g.ready() {
|
|
return 0, nil
|
|
}
|
|
n := 0
|
|
for _, t := range triples {
|
|
if t.S == "" || t.O == "" || t.P == "" {
|
|
continue
|
|
}
|
|
_, err := neo4j.ExecuteQuery(ctx, g.driver,
|
|
`MERGE (a:Entity {kb:$kb, name:$s})
|
|
MERGE (b:Entity {kb:$kb, name:$o})
|
|
MERGE (a)-[r:REL {type:$p}]->(b)`,
|
|
map[string]any{"kb": kb, "s": t.S, "o": t.O, "p": t.P},
|
|
neo4j.EagerResultTransformer, neo4j.ExecuteQueryWithDatabase("neo4j"))
|
|
if err != nil {
|
|
return n, err
|
|
}
|
|
n++
|
|
}
|
|
return n, nil
|
|
}
|
|
|
|
// search 图谱召回:找查询里提到的实体,返回其相连三元组(文本化)。
|
|
func (g *graphStore) search(ctx context.Context, kb, query string, limit int) []Hit {
|
|
if !g.ready() || query == "" {
|
|
return nil
|
|
}
|
|
res, err := neo4j.ExecuteQuery(ctx, g.driver,
|
|
`MATCH (a:Entity {kb:$kb})-[r:REL]->(b:Entity {kb:$kb})
|
|
WHERE $q CONTAINS a.name OR $q CONTAINS b.name
|
|
RETURN a.name AS s, r.type AS p, b.name AS o LIMIT $k`,
|
|
map[string]any{"kb": kb, "q": query, "k": limit},
|
|
neo4j.EagerResultTransformer, neo4j.ExecuteQueryWithDatabase("neo4j"))
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
var hits []Hit
|
|
for _, rec := range res.Records {
|
|
s, _ := rec.Get("s")
|
|
p, _ := rec.Get("p")
|
|
o, _ := rec.Get("o")
|
|
hits = append(hits, Hit{Text: fmt.Sprintf("%v —%v→ %v", s, p, o), Score: 1})
|
|
}
|
|
return hits
|
|
}
|
|
|
|
// triples 返回某 kb 的全部三元组(供 UI 图谱可视化)。
|
|
func (g *graphStore) triples(ctx context.Context, kb string, limit int) []Triple {
|
|
if !g.ready() {
|
|
return nil
|
|
}
|
|
res, err := neo4j.ExecuteQuery(ctx, g.driver,
|
|
`MATCH (a:Entity {kb:$kb})-[r:REL]->(b:Entity {kb:$kb})
|
|
RETURN a.name AS s, r.type AS p, b.name AS o LIMIT $k`,
|
|
map[string]any{"kb": kb, "k": limit},
|
|
neo4j.EagerResultTransformer, neo4j.ExecuteQueryWithDatabase("neo4j"))
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
var out []Triple
|
|
for _, rec := range res.Records {
|
|
s, _ := rec.Get("s")
|
|
p, _ := rec.Get("p")
|
|
o, _ := rec.Get("o")
|
|
out = append(out, Triple{S: fmt.Sprint(s), P: fmt.Sprint(p), O: fmt.Sprint(o)})
|
|
}
|
|
return out
|
|
}
|
|
|
|
// extractTriples 用 LLM 从文本抽取知识三元组。
|
|
func extractTriples(ctx context.Context, chat *chatClient, text string) ([]Triple, error) {
|
|
if !chat.ready() {
|
|
return nil, nil
|
|
}
|
|
const sys = "你是知识图谱抽取器。从用户文本中抽取知识三元组,输出 JSON 数组,每项形如 {\"s\":\"主体\",\"p\":\"关系\",\"o\":\"客体\"}。实体用简洁名词,关系用简短动词短语。只输出 JSON,不要任何解释或代码块标记。"
|
|
out, err := chat.complete(ctx, sys, text)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return parseTriples(out), nil
|
|
}
|
|
|
|
// parseTriples 容忍代码块/前后噪声地解析三元组 JSON。
|
|
func parseTriples(s string) []Triple {
|
|
s = strings.TrimSpace(s)
|
|
s = strings.TrimPrefix(s, "```json")
|
|
s = strings.TrimPrefix(s, "```")
|
|
s = strings.TrimSuffix(s, "```")
|
|
if i := strings.Index(s, "["); i >= 0 {
|
|
if j := strings.LastIndex(s, "]"); j > i {
|
|
s = s[i : j+1]
|
|
}
|
|
}
|
|
var triples []Triple
|
|
if json.Unmarshal([]byte(s), &triples) != nil {
|
|
return nil
|
|
}
|
|
return triples
|
|
}
|