feat(kb): 笔记可编辑(按 doc 替换重索引)+ 笔记关系图([[双链]])
Obsidian 化继续:笔记能编辑/新建,文档间 [[双链]] 连成可点关系图。
按 doc 重索引(编辑不重复累积):
- Milvus 加 doc 字段(旧 schema 自动重建);insert 带 doc;deleteDoc(kb,doc) 重入库前清旧块。
- Bleve 索引 id 含 doc + deleteDoc 按 kb+doc 清旧块。
- rag.Ingest(kb, doc, text):写入前按 doc 删旧块再写(Neo4j MERGE 仍幂等,附加式)。
- kb_ingest 工具加 doc 参数;gateway runIngest 把 doc 透传,forceDoc 支持编辑保持笔记名稳定。
编辑/新建:
- gateway POST /kb/note {kb,name,content}:落库 + 以 name 为 doc 重入库(替换旧块,搜索/图谱同步)。
- 前端 VaultPanel:阅读/编辑切换(textarea 预填原文,保存调 saveNote)、新建笔记、乐观更新。
笔记关系图:
- GraphView 加 onNode(节点可点);VaultPanel 阅读/关系图切换,关系图 = 文档间 [[双链]] 三元组
力导向(点节点跳转该笔记)。
验证:curl 编辑 笔记B → 检索只返编辑后内容(旧块已清,不重复)。Preview:关系图渲染
笔记B—链接→项目A概述/模块X 且节点可点;编辑器预填原文可改可存。tsc+vite+后端 build 通过;重建 .app。
注:Milvus 加 doc 字段会触发集合重建(旧向量丢,文库原文在 PG 可重灌);Neo4j 图谱按附加式合并,
编辑删除的实体不会自动消失(图谱倾向增长)。
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -94,7 +94,8 @@ function nodeColor(deg: number): { fill: string; text: string } {
|
||||
}
|
||||
|
||||
// GraphView 把知识三元组渲染为力导向图(实体=节点,关系=带标签的边),hover 高亮邻域。
|
||||
export function GraphView({ triples, height = 360 }: { triples: Triple[]; height?: number }) {
|
||||
// onNode 非空时节点可点(用于笔记关系图点节点开笔记)。
|
||||
export function GraphView({ triples, height = 360, onNode }: { triples: Triple[]; height?: number; onNode?: (id: string) => void }) {
|
||||
const W = 560;
|
||||
const H = height;
|
||||
const [hover, setHover] = useState<string | null>(null);
|
||||
@@ -148,6 +149,7 @@ export function GraphView({ triples, height = 360 }: { triples: Triple[]; height
|
||||
opacity={on ? 1 : 0.2}
|
||||
onMouseEnter={() => setHover(n.id)}
|
||||
onMouseLeave={() => setHover(null)}
|
||||
onClick={() => onNode?.(n.id)}
|
||||
style={{ cursor: "pointer" }}
|
||||
>
|
||||
<circle cx={n.x} cy={n.y} r={r} fill={c.fill} stroke={hover === n.id ? "#fff" : "#0b0d12"} strokeWidth={hover === n.id ? 2 : 1.5} />
|
||||
|
||||
@@ -134,6 +134,17 @@ export async function listVault(id: Identity, kb: string): Promise<VaultDoc[]> {
|
||||
return data.docs ?? [];
|
||||
}
|
||||
|
||||
// saveNote: POST /api/v1/kb/note —— 新建/编辑笔记(落库 + 按 doc 重入库替换旧块)。
|
||||
export async function saveNote(id: Identity, kb: string, name: string, content: string): Promise<void> {
|
||||
const res = await fetch(`${GATEWAY}/api/v1/kb/note`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", ...idHeaders(id) },
|
||||
body: JSON.stringify({ kb, name, content }),
|
||||
});
|
||||
const data = (await res.json()) as { name?: string; error?: string };
|
||||
if (!res.ok || !data.name) throw new Error(data.error ?? `save failed: ${res.status}`);
|
||||
}
|
||||
|
||||
// ingestKb: POST /api/v1/kb/ingest —— 文本入库(异步,返回 job_id)。
|
||||
export async function ingestKb(id: Identity, kb: string, text: string): Promise<string> {
|
||||
const res = await fetch(`${GATEWAY}/api/v1/kb/ingest`, {
|
||||
|
||||
@@ -19,6 +19,10 @@ import {
|
||||
BookOpen,
|
||||
Link2,
|
||||
RefreshCw,
|
||||
Pencil,
|
||||
Save,
|
||||
X,
|
||||
Waypoints,
|
||||
type LucideIcon,
|
||||
} from "lucide-react";
|
||||
import {
|
||||
@@ -30,6 +34,7 @@ import {
|
||||
listKb,
|
||||
createKb,
|
||||
listVault,
|
||||
saveNote,
|
||||
type IngestEvent,
|
||||
type KbHit,
|
||||
type Triple,
|
||||
@@ -430,11 +435,27 @@ function escapeReg(s: string): string {
|
||||
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
|
||||
// VaultPanel:Obsidian 式文库 —— 左文档列表 / 右 Markdown 笔记([[双链]]可点)+ 反向链接。
|
||||
// wikiLinks 从内容中抽出所有 [[名称]](忽略别名部分)。
|
||||
function wikiLinks(content: string): string[] {
|
||||
const out: string[] = [];
|
||||
const re = /\[\[([^\]|]+)(\|[^\]]*)?\]\]/g;
|
||||
let m: RegExpExecArray | null;
|
||||
while ((m = re.exec(content)) !== null) out.push(m[1].trim());
|
||||
return out;
|
||||
}
|
||||
|
||||
// VaultPanel:Obsidian 式文库 —— 文档列表 / Markdown 阅读+编辑([[双链]]可点)/ 反向链接 / 笔记关系图。
|
||||
function VaultPanel({ identity, kb }: { identity: Identity; kb: string }) {
|
||||
const toast = useToast();
|
||||
const [docs, setDocs] = useState<VaultDoc[]>([]);
|
||||
const [sel, setSel] = useState<string | null>(null);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [mode, setMode] = useState<"read" | "graph">("read");
|
||||
const [editing, setEditing] = useState(false);
|
||||
const [creatingNew, setCreatingNew] = useState(false);
|
||||
const [draft, setDraft] = useState("");
|
||||
const [draftName, setDraftName] = useState("");
|
||||
const [saving, setSaving] = useState(false);
|
||||
|
||||
const load = useCallback(async () => {
|
||||
setLoading(true);
|
||||
@@ -450,26 +471,77 @@ function VaultPanel({ identity, kb }: { identity: Identity; kb: string }) {
|
||||
}, [identity, kb]);
|
||||
useEffect(() => {
|
||||
void load();
|
||||
setEditing(false);
|
||||
setMode("read");
|
||||
}, [load]);
|
||||
|
||||
const names = new Set(docs.map((d) => d.name));
|
||||
const current = docs.find((d) => d.name === sel);
|
||||
const open = (name: string) => {
|
||||
if (names.has(name)) setSel(name);
|
||||
if (names.has(name)) {
|
||||
setSel(name);
|
||||
setMode("read");
|
||||
setEditing(false);
|
||||
}
|
||||
};
|
||||
const backlinks = current
|
||||
? docs.filter((d) => d.name !== current.name && new RegExp(`\\[\\[\\s*${escapeReg(current.name)}(\\|[^\\]]*)?\\s*\\]\\]`).test(d.content))
|
||||
: [];
|
||||
|
||||
if (!loading && docs.length === 0) {
|
||||
return <EmptyState icon={BookOpen} title="文库为空" desc={`「${kb}」还没有文档。到「入库」拖入文件或写笔记(支持 [[双链]])。`} />;
|
||||
// 笔记关系图:文档间 [[双链]] → 三元组(仅保留指向已存在笔记的边)。
|
||||
const noteTriples: Triple[] = [];
|
||||
for (const d of docs) {
|
||||
for (const link of wikiLinks(d.content)) {
|
||||
if (names.has(link) && link !== d.name) noteTriples.push({ s: d.name, p: "链接", o: link });
|
||||
}
|
||||
}
|
||||
|
||||
const startNew = () => {
|
||||
setCreatingNew(true);
|
||||
setEditing(true);
|
||||
setDraftName("");
|
||||
setDraft("");
|
||||
};
|
||||
const startEdit = () => {
|
||||
if (!current) return;
|
||||
setCreatingNew(false);
|
||||
setEditing(true);
|
||||
setDraftName(current.name);
|
||||
setDraft(current.content);
|
||||
};
|
||||
const onSave = async () => {
|
||||
const name = (creatingNew ? draftName : current?.name ?? "").trim();
|
||||
if (!name || !draft.trim()) {
|
||||
toast.push("error", "笔记名与内容不能为空");
|
||||
return;
|
||||
}
|
||||
setSaving(true);
|
||||
try {
|
||||
await saveNote(identity, kb, name, draft);
|
||||
toast.push("success", `已保存「${name}」(正在重建索引)`);
|
||||
setEditing(false);
|
||||
setCreatingNew(false);
|
||||
// 乐观更新本地 + 选中,再后台刷新。
|
||||
setDocs((ds) => [...ds.filter((x) => x.name !== name), { name, content: draft }]);
|
||||
setSel(name);
|
||||
setTimeout(() => void load(), 300);
|
||||
} catch (e) {
|
||||
toast.push("error", (e as Error).message);
|
||||
} finally {
|
||||
setSaving(false);
|
||||
}
|
||||
};
|
||||
|
||||
const empty = !loading && docs.length === 0;
|
||||
|
||||
return (
|
||||
<div className="flex h-full min-h-0">
|
||||
<aside className="flex w-56 shrink-0 flex-col overflow-y-auto border-r border-line p-2">
|
||||
<div className="mb-1 flex items-center justify-between px-1 text-[11px] text-slate-500">
|
||||
<div className="mb-1 flex items-center gap-1 px-1 text-[11px] text-slate-500">
|
||||
<span>文档 {docs.length}</span>
|
||||
<button onClick={startNew} className="ml-auto text-slate-500 hover:text-brand-400" title="新建笔记">
|
||||
<Plus className="h-3.5 w-3.5" />
|
||||
</button>
|
||||
<button onClick={load} className="text-slate-600 hover:text-slate-300" title="刷新">
|
||||
<RefreshCw className={cn("h-3.5 w-3.5", loading && "animate-spin")} />
|
||||
</button>
|
||||
@@ -478,7 +550,7 @@ function VaultPanel({ identity, kb }: { identity: Identity; kb: string }) {
|
||||
{docs.map((d) => (
|
||||
<li key={d.name}>
|
||||
<button
|
||||
onClick={() => setSel(d.name)}
|
||||
onClick={() => open(d.name)}
|
||||
className={cn("flex w-full items-center gap-1.5 rounded px-2 py-1.5 text-left text-xs", d.name === sel ? "bg-brand/15 text-brand-400" : "text-slate-300 hover:bg-ink-800")}
|
||||
>
|
||||
<FileText className="h-3.5 w-3.5 shrink-0" />
|
||||
@@ -488,8 +560,51 @@ function VaultPanel({ identity, kb }: { identity: Identity; kb: string }) {
|
||||
))}
|
||||
</ul>
|
||||
</aside>
|
||||
|
||||
<div className="min-h-0 flex-1 overflow-y-auto p-5">
|
||||
{current ? (
|
||||
{/* 工具条:阅读/关系图 + 编辑 */}
|
||||
<div className="mb-3 flex items-center gap-2">
|
||||
<div className="flex rounded-md border border-line p-0.5 text-[11px]">
|
||||
<button onClick={() => setMode("read")} className={cn("flex items-center gap-1 rounded px-2 py-1", mode === "read" ? "bg-brand/15 text-brand-400" : "text-slate-400")}>
|
||||
<BookOpen className="h-3 w-3" /> 阅读
|
||||
</button>
|
||||
<button onClick={() => setMode("graph")} className={cn("flex items-center gap-1 rounded px-2 py-1", mode === "graph" ? "bg-brand/15 text-brand-400" : "text-slate-400")}>
|
||||
<Waypoints className="h-3 w-3" /> 关系图
|
||||
</button>
|
||||
</div>
|
||||
{mode === "read" && !editing && current && (
|
||||
<Button size="sm" variant="ghost" icon={Pencil} onClick={startEdit}>
|
||||
编辑
|
||||
</Button>
|
||||
)}
|
||||
{editing && (
|
||||
<>
|
||||
<Button size="sm" variant="primary" icon={Save} onClick={onSave} disabled={saving}>
|
||||
{saving ? "保存中…" : "保存"}
|
||||
</Button>
|
||||
<Button size="sm" variant="ghost" icon={X} onClick={() => setEditing(false)}>
|
||||
取消
|
||||
</Button>
|
||||
</>
|
||||
)}
|
||||
<span className="ml-auto text-[10px] text-slate-600">笔记支持 [[双链]];保存后重建索引与图谱</span>
|
||||
</div>
|
||||
|
||||
{mode === "graph" ? (
|
||||
noteTriples.length > 0 ? (
|
||||
<GraphView triples={noteTriples} height={440} onNode={open} />
|
||||
) : (
|
||||
<EmptyState icon={Waypoints} title="暂无笔记关系" desc="在笔记里用 [[其它笔记名]] 互相引用,这里会连成关系图(点节点跳转)。" />
|
||||
)
|
||||
) : editing ? (
|
||||
<div className="flex flex-col gap-2">
|
||||
{creatingNew && <Input value={draftName} onChange={(e) => setDraftName(e.target.value)} placeholder="笔记名,如 项目A概述" autoFocus />}
|
||||
{!creatingNew && <div className="text-sm font-semibold text-slate-100">{draftName}</div>}
|
||||
<Textarea className="min-h-[360px] w-full font-mono" value={draft} onChange={(e) => setDraft(e.target.value)} placeholder={"# 标题\n正文支持 Markdown 与 [[双链]]…"} />
|
||||
</div>
|
||||
) : empty ? (
|
||||
<EmptyState icon={BookOpen} title="文库为空" desc={`「${kb}」还没有文档。点左上 + 新建笔记,或到「入库」拖入文件。`} action={<Button size="sm" icon={Plus} onClick={startNew}>新建笔记</Button>} />
|
||||
) : current ? (
|
||||
<>
|
||||
<h2 className="mb-3 flex items-center gap-2 text-base font-semibold text-slate-100">
|
||||
<FileText className="h-4 w-4 text-brand-400" />
|
||||
@@ -506,7 +621,7 @@ function VaultPanel({ identity, kb }: { identity: Identity; kb: string }) {
|
||||
<ul className="space-y-1">
|
||||
{backlinks.map((b) => (
|
||||
<li key={b.name}>
|
||||
<button onClick={() => setSel(b.name)} className="text-xs text-brand-400 hover:underline">
|
||||
<button onClick={() => open(b.name)} className="text-xs text-brand-400 hover:underline">
|
||||
{b.name}
|
||||
</button>
|
||||
</li>
|
||||
@@ -516,7 +631,7 @@ function VaultPanel({ identity, kb }: { identity: Identity; kb: string }) {
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
<div className="text-sm text-slate-600">选择左侧文档查看。</div>
|
||||
<div className="text-sm text-slate-600">选择左侧文档查看,或新建笔记。</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -77,10 +77,30 @@ func (h *Handler) KbIngest(c *gin.Context) {
|
||||
}
|
||||
_ = h.db.EnsureKB(c.Request.Context(), userID(c), rawKB(body.KB), "general")
|
||||
job := newJobID()
|
||||
go h.runIngest(job, userID(c), rawKB(body.KB), scopedKB(c, body.KB), "", nil, body.Text)
|
||||
go h.runIngest(job, userID(c), rawKB(body.KB), scopedKB(c, body.KB), "", "", nil, body.Text)
|
||||
c.JSON(http.StatusAccepted, gin.H{"job_id": job})
|
||||
}
|
||||
|
||||
// KbSaveNote: POST /api/v1/kb/note {kb, name, content} —— 新建/编辑笔记。
|
||||
// 立即落库(文库可见),并以 name 为 doc 重新入库(替换旧块,搜索/图谱同步)。返回 job_id。
|
||||
func (h *Handler) KbSaveNote(c *gin.Context) {
|
||||
var body struct {
|
||||
KB string `json:"kb"`
|
||||
Name string `json:"name"`
|
||||
Content string `json:"content"`
|
||||
}
|
||||
if err := c.ShouldBindJSON(&body); err != nil || strings.TrimSpace(body.Name) == "" || strings.TrimSpace(body.Content) == "" {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "name/content required"})
|
||||
return
|
||||
}
|
||||
owner := userID(c)
|
||||
_ = h.db.EnsureKB(c.Request.Context(), owner, rawKB(body.KB), "general")
|
||||
_ = h.db.SaveDoc(c.Request.Context(), owner, rawKB(body.KB), body.Name, body.Content)
|
||||
job := newJobID()
|
||||
go h.runIngest(job, owner, rawKB(body.KB), scopedKB(c, body.KB), body.Name, "", nil, body.Content)
|
||||
c.JSON(http.StatusAccepted, gin.H{"job_id": job, "name": body.Name})
|
||||
}
|
||||
|
||||
// KbVault: GET /api/v1/kb/vault?kb= —— 某知识库的全部原始文档(名+内容),供 Obsidian 式文库浏览。
|
||||
func (h *Handler) KbVault(c *gin.Context) {
|
||||
rows, err := h.db.ListVault(c.Request.Context(), userID(c), rawKB(c.Query("kb")))
|
||||
@@ -132,14 +152,15 @@ func (h *Handler) KbIngestFile(c *gin.Context) {
|
||||
}
|
||||
_ = h.db.EnsureKB(c.Request.Context(), userID(c), rawKB(kb), "general")
|
||||
job := newJobID()
|
||||
go h.runIngest(job, userID(c), rawKB(kb), scopedKB(c, kb), fh.Filename, data, "")
|
||||
go h.runIngest(job, userID(c), rawKB(kb), scopedKB(c, kb), "", fh.Filename, data, "")
|
||||
c.JSON(http.StatusAccepted, gin.H{"job_id": job, "file": fh.Filename})
|
||||
}
|
||||
|
||||
// runIngest 后台跑入库流水线,逐阶段把进度发到 sundynix.streams.<job>。
|
||||
// owner+kbName 用于"文库"原文留存;scoped 是 owner/kb 作向量/全文/图谱分区键。
|
||||
// forceDoc 非空时强制以它为文档名(笔记编辑用,保持笔记身份稳定)。
|
||||
// filename 非空表示文件入库(先经 mcp-py 解析);否则用 rawText。
|
||||
func (h *Handler) runIngest(job, owner, kbName, scoped, filename string, data []byte, rawText string) {
|
||||
func (h *Handler) runIngest(job, owner, kbName, scoped, forceDoc, filename string, data []byte, rawText string) {
|
||||
ctx := context.Background()
|
||||
emit := func(ev contract.IngestEvent) { _ = h.bus.PublishIngest(job, &ev) }
|
||||
time.Sleep(400 * time.Millisecond) // 给 SSE 客户端订阅时间(core NATS 无缓冲)
|
||||
@@ -161,8 +182,11 @@ func (h *Handler) runIngest(job, owner, kbName, scoped, filename string, data []
|
||||
text = parsed
|
||||
}
|
||||
|
||||
// 文库留存原文:文件用文件名,文本用首行作笔记名(best-effort,不阻断入库)。
|
||||
docName := filename
|
||||
// 文库留存原文:编辑指定名 > 文件名 > 文本首行。
|
||||
docName := forceDoc
|
||||
if docName == "" {
|
||||
docName = filename
|
||||
}
|
||||
if docName == "" {
|
||||
docName = noteName(text)
|
||||
}
|
||||
@@ -172,7 +196,7 @@ func (h *Handler) runIngest(job, owner, kbName, scoped, filename string, data []
|
||||
|
||||
// 调 mcp-go kb_ingest(带 job_id):它会发 切块/向量化/写入/完成 事件 + CompleteStream。
|
||||
res, err := h.bus.CallTool(ctx, contract.ToolSubjectGo("kb_ingest"),
|
||||
&contract.ToolCall{Tool: "kb_ingest", Args: map[string]any{"kb": scoped, "text": text, "job_id": job}})
|
||||
&contract.ToolCall{Tool: "kb_ingest", Args: map[string]any{"kb": scoped, "doc": docName, "text": text, "job_id": job}})
|
||||
if err != nil || res == nil || !res.OK {
|
||||
msg := "kb_ingest 失败"
|
||||
if err != nil {
|
||||
|
||||
@@ -31,6 +31,7 @@ func New(db *store.Postgres, cache *store.Redis, bus *nats.Bus) *gin.Engine {
|
||||
api.GET("/kb/ingest/:id/stream", h.KbIngestStream) // 入库进度 SSE(实时监控)
|
||||
api.POST("/kb/search", h.KbSearch) // 知识库检索台(→ mcp-go kb_search)
|
||||
api.GET("/kb/vault", h.KbVault) // 文库:原始文档浏览(Obsidian 式)
|
||||
api.POST("/kb/note", h.KbSaveNote) // 新建/编辑笔记(落库 + 按 doc 重入库)
|
||||
api.GET("/kb/graph", h.KbGraph) // 知识图谱三元组(→ mcp-go kb_graph,Neo4j)
|
||||
|
||||
api.POST("/reports", h.GenerateReport) // 报告生成(intent=report 任务 → Dispatcher 专用编排)
|
||||
|
||||
@@ -217,6 +217,7 @@ func (g *Gateway) reportRender(ctx context.Context, call *contract.ToolCall) *co
|
||||
// 带 job_id 时逐阶段把进度发到 sundynix.streams.<job_id>,供 UI 实时入库监控。
|
||||
func (g *Gateway) kbIngest(ctx context.Context, call *contract.ToolCall) *contract.ToolResult {
|
||||
kb, _ := call.Args["kb"].(string)
|
||||
doc, _ := call.Args["doc"].(string)
|
||||
text, _ := call.Args["text"].(string)
|
||||
jobID, _ := call.Args["job_id"].(string)
|
||||
if text == "" {
|
||||
@@ -230,7 +231,7 @@ func (g *Gateway) kbIngest(ctx context.Context, call *contract.ToolCall) *contra
|
||||
}
|
||||
}
|
||||
}
|
||||
n, err := g.rag.Ingest(ctx, kb, text, onProgress)
|
||||
n, err := g.rag.Ingest(ctx, kb, doc, text, onProgress)
|
||||
if jobID != "" {
|
||||
if err != nil {
|
||||
onProgress(contract.IngestEvent{Stage: "失败", Error: err.Error()})
|
||||
|
||||
@@ -26,21 +26,43 @@ func openBleve() *bleveStore {
|
||||
|
||||
func (b *bleveStore) ready() bool { return b != nil && b.idx != nil }
|
||||
|
||||
// index 把 (kb, texts) 写入全文索引(按 kb+文本哈希做幂等 ID)。
|
||||
func (b *bleveStore) index(kb string, texts []string) error {
|
||||
// index 把 (kb, doc, texts) 写入全文索引(id 含 kb+doc+文本哈希,幂等)。
|
||||
func (b *bleveStore) index(kb, doc string, texts []string) error {
|
||||
if !b.ready() {
|
||||
return nil
|
||||
}
|
||||
batch := b.idx.NewBatch()
|
||||
for _, t := range texts {
|
||||
id := fmt.Sprintf("%s:%x", kb, fnvHash(t))
|
||||
if err := batch.Index(id, map[string]any{"text": t, "kb": kb}); err != nil {
|
||||
id := fmt.Sprintf("%s:%s:%x", kb, doc, fnvHash(t))
|
||||
if err := batch.Index(id, map[string]any{"text": t, "kb": kb, "doc": doc}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return b.idx.Batch(batch)
|
||||
}
|
||||
|
||||
// deleteDoc 删除某 (kb, doc) 的全部全文块(笔记重入库前清旧块)。
|
||||
func (b *bleveStore) deleteDoc(kb, doc string) {
|
||||
if !b.ready() || doc == "" {
|
||||
return
|
||||
}
|
||||
kq := bleve.NewTermQuery(kb)
|
||||
kq.SetField("kb")
|
||||
dq := bleve.NewTermQuery(doc)
|
||||
dq.SetField("doc")
|
||||
req := bleve.NewSearchRequest(bleve.NewConjunctionQuery(kq, dq))
|
||||
req.Size = 1000
|
||||
res, err := b.idx.Search(req)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
batch := b.idx.NewBatch()
|
||||
for _, h := range res.Hits {
|
||||
batch.Delete(h.ID)
|
||||
}
|
||||
_ = b.idx.Batch(batch)
|
||||
}
|
||||
|
||||
// search 全文检索(可按 kb 过滤),返回 BM25 排序的命中。
|
||||
func (b *bleveStore) search(kb, q string, topK int) []Hit {
|
||||
if !b.ready() || q == "" {
|
||||
|
||||
@@ -47,11 +47,12 @@ func (m *milvusStore) ensure(ctx context.Context, dim int) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// 已存集合维度不一致(如切换 embedding 模型)→ 重建。
|
||||
// 已存集合维度不一致(切 embedding 模型)或缺 doc 字段(旧 schema)→ 重建。
|
||||
if has {
|
||||
if coll, derr := m.cli.DescribeCollection(ctx, collection); derr == nil {
|
||||
if existing := vectorDim(coll); existing != 0 && existing != dim {
|
||||
log.Printf("[rag] 集合维度 %d≠%d,重建 %s", existing, dim, collection)
|
||||
dimBad := vectorDim(coll) != 0 && vectorDim(coll) != dim
|
||||
if dimBad || !hasField(coll, "doc") {
|
||||
log.Printf("[rag] 集合需重建(dim 变化或缺 doc 字段):%s", collection)
|
||||
if err := m.cli.DropCollection(ctx, collection); err != nil {
|
||||
return fmt.Errorf("drop collection: %w", err)
|
||||
}
|
||||
@@ -63,6 +64,7 @@ func (m *milvusStore) ensure(ctx context.Context, dim int) error {
|
||||
schema := entity.NewSchema().WithName(collection).WithDescription("sundynix wiki vectors").
|
||||
WithField(entity.NewField().WithName("id").WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(true)).
|
||||
WithField(entity.NewField().WithName("kb").WithDataType(entity.FieldTypeVarChar).WithMaxLength(64)).
|
||||
WithField(entity.NewField().WithName("doc").WithDataType(entity.FieldTypeVarChar).WithMaxLength(200)).
|
||||
WithField(entity.NewField().WithName("text").WithDataType(entity.FieldTypeVarChar).WithMaxLength(8192)).
|
||||
WithField(entity.NewField().WithName("vector").WithDataType(entity.FieldTypeFloatVector).WithDim(int64(dim)))
|
||||
if err := m.cli.CreateCollection(ctx, schema, 1); err != nil {
|
||||
@@ -99,16 +101,32 @@ func isCollectionGone(err error) bool {
|
||||
strings.Contains(s, "collection not loaded")
|
||||
}
|
||||
|
||||
// insert 写入若干 (kb, text, vector)。
|
||||
// deleteDoc 删除某 (kb, doc) 的全部块 —— 笔记重新入库前先清旧块,避免重复累积。
|
||||
func (m *milvusStore) deleteDoc(ctx context.Context, kb, doc string, dim int) {
|
||||
if doc == "" {
|
||||
return
|
||||
}
|
||||
if err := m.ensure(ctx, dim); err != nil {
|
||||
return // 集合还没建 → 无旧块可删
|
||||
}
|
||||
expr := fmt.Sprintf("kb == %q && doc == %q", kb, doc)
|
||||
if err := m.cli.Delete(ctx, collection, "", expr); err != nil {
|
||||
log.Printf("[rag] 按 doc 删除旧块失败(忽略): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// insert 写入若干 (kb, doc, text, vector)。
|
||||
// 若集合在运行期被丢失(如 Milvus 重启)→ 清缓存、重建集合后重试一次,避免必须重启进程才能恢复。
|
||||
func (m *milvusStore) insert(ctx context.Context, kb string, texts []string, vecs [][]float32) error {
|
||||
func (m *milvusStore) insert(ctx context.Context, kb, doc string, texts []string, vecs [][]float32) error {
|
||||
if len(vecs) == 0 {
|
||||
return nil
|
||||
}
|
||||
dim := len(vecs[0])
|
||||
kbs := make([]string, len(texts))
|
||||
docs := make([]string, len(texts))
|
||||
for i := range kbs {
|
||||
kbs[i] = kb
|
||||
docs[i] = doc
|
||||
}
|
||||
do := func() error {
|
||||
if err := m.ensure(ctx, dim); err != nil {
|
||||
@@ -116,6 +134,7 @@ func (m *milvusStore) insert(ctx context.Context, kb string, texts []string, vec
|
||||
}
|
||||
if _, err := m.cli.Insert(ctx, collection, "",
|
||||
entity.NewColumnVarChar("kb", kbs),
|
||||
entity.NewColumnVarChar("doc", docs),
|
||||
entity.NewColumnVarChar("text", texts),
|
||||
entity.NewColumnFloatVector("vector", dim, vecs),
|
||||
); err != nil {
|
||||
@@ -132,6 +151,19 @@ func (m *milvusStore) insert(ctx context.Context, kb string, texts []string, vec
|
||||
return err
|
||||
}
|
||||
|
||||
// hasField 判断集合 schema 是否含某字段。
|
||||
func hasField(coll *entity.Collection, name string) bool {
|
||||
if coll == nil || coll.Schema == nil {
|
||||
return false
|
||||
}
|
||||
for _, f := range coll.Schema.Fields {
|
||||
if f.Name == name {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// vectorDim 从集合 schema 读出向量字段维度(用于检测维度变化)。
|
||||
func vectorDim(coll *entity.Collection) int {
|
||||
if coll == nil || coll.Schema == nil {
|
||||
|
||||
@@ -114,8 +114,9 @@ func (e *Engine) Status() map[string]bool {
|
||||
}
|
||||
|
||||
// Ingest 把一段文本切块 → 分批向量化 → 写 Milvus + Bleve,返回块数。
|
||||
// doc 非空表示这是某篇文档/笔记(按 doc 先删旧块再写,支持编辑替换,不重复累积)。
|
||||
// onProgress 非空时逐阶段/逐批回调进度(用于实时入库监控)。
|
||||
func (e *Engine) Ingest(ctx context.Context, kb, text string, onProgress func(contract.IngestEvent)) (int, error) {
|
||||
func (e *Engine) Ingest(ctx context.Context, kb, doc, text string, onProgress func(contract.IngestEvent)) (int, error) {
|
||||
emit := func(ev contract.IngestEvent) {
|
||||
if onProgress != nil {
|
||||
onProgress(ev)
|
||||
@@ -144,12 +145,16 @@ func (e *Engine) Ingest(ctx context.Context, kb, text string, onProgress func(co
|
||||
}
|
||||
|
||||
emit(contract.IngestEvent{Stage: "写Milvus", Msg: "向量库写入中"})
|
||||
if err := e.mv.insert(ctx, kb, chunks, vecs); err != nil {
|
||||
if len(vecs) > 0 {
|
||||
e.mv.deleteDoc(ctx, kb, doc, len(vecs[0])) // 编辑/重入库:先清该 doc 旧块
|
||||
}
|
||||
if err := e.mv.insert(ctx, kb, doc, chunks, vecs); err != nil {
|
||||
emit(contract.IngestEvent{Stage: "失败", Error: "写Milvus: " + err.Error()})
|
||||
return 0, err
|
||||
}
|
||||
emit(contract.IngestEvent{Stage: "写Bleve", Msg: "全文索引写入中"})
|
||||
_ = e.bleve.index(kb, chunks) // 同步写全文索引(失败不阻断向量入库)
|
||||
e.bleve.deleteDoc(kb, doc)
|
||||
_ = e.bleve.index(kb, doc, chunks) // 同步写全文索引(失败不阻断向量入库)
|
||||
|
||||
// 图谱路:LLM 抽实体/关系 → Neo4j(可降级,不阻断向量入库)。
|
||||
if e.graph.ready() && e.chatClient().ready() {
|
||||
|
||||
Reference in New Issue
Block a user