feat(kb): 入库可视化做厚 —— 文件解析/知识抽取过程 + 力导向知识图谱

把"进度条"升级成可观测的入库工作台,回应三点诉求:解析过程、知识抽取过程、丰富图谱。

- contract: IngestEvent 加 Preview(解析文本预览)+ Triples[]TripleView(抽出的三元组)。
- 后端回流:rag.Ingest 抽实体阶段把 LLM 抽出的三元组实时回流(边出现边渲染);
  gateway 解析完成回流文件类型 + 文本预览片段。
- 前端 GraphView.tsx:零依赖自建力导向布局(斥力+边弹簧+居中静态收敛),实体=节点
  按度着色(枢纽紫/关联青/叶子)、关系=带标签边、hover 高亮邻域、节点过多按度裁剪。
- 前端 KbView 重做:入库从"阶段徽标+进度条"→竖向时间线(解析预览/切块块/向量化进度/
  抽取知识三元组 chips + 实时小图谱逐步浮现);右侧知识图谱从扁平列表→GraphView,
  入库完成自动刷新整库图谱。

验证(Preview):入库一段多事实文本 → 时间线逐阶段点亮、抽出 17 条三元组实时浮现、
右侧力导向图渲染 sundynix-agentix/知识库 为枢纽 + 带标签关系边。tsc+vite+后端 build 通过。

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Blizzard
2026-06-13 14:22:50 +08:00
parent d5dfb7a928
commit 72e008bfe8
6 changed files with 408 additions and 112 deletions
@@ -0,0 +1,169 @@
import { useMemo, useState } from "react";
import { Network } from "lucide-react";
import type { Triple } from "../lib/api";
import { EmptyState } from "../ui";
interface GNode {
id: string;
x: number;
y: number;
deg: number;
}
interface GEdge {
s: string;
o: string;
p: string;
}
// layout 用一个轻量力导向模拟(斥力 + 边弹簧 + 居中)把三元组排成图。
// 静态收敛(useMemo 内跑固定迭代),零依赖;节点过多时按度裁剪。
function layout(triples: Triple[], W: number, H: number): { nodes: GNode[]; edges: GEdge[] } {
const deg = new Map<string, number>();
for (const t of triples) {
if (!t.s || !t.o) continue;
deg.set(t.s, (deg.get(t.s) ?? 0) + 1);
deg.set(t.o, (deg.get(t.o) ?? 0) + 1);
}
// 裁剪:实体过多只留度最高的 N 个,保留两端都在集合内的边。
let names = [...deg.keys()];
const CAP = 60;
if (names.length > CAP) {
names = names.sort((a, b) => (deg.get(b)! - deg.get(a)!)).slice(0, CAP);
}
const keep = new Set(names);
const edges = triples.filter((t) => keep.has(t.s) && keep.has(t.o)).map((t) => ({ s: t.s, o: t.o, p: t.p }));
const nodes = new Map<string, GNode>();
const R = Math.min(W, H) * 0.36;
names.forEach((n, i) => {
const a = (2 * Math.PI * i) / names.length;
// 初始撒在圆周上(确定性,避免每次重排抖动)。
nodes.set(n, { id: n, x: W / 2 + Math.cos(a) * R, y: H / 2 + Math.sin(a) * R, deg: deg.get(n)! });
});
const arr = [...nodes.values()];
for (let it = 0; it < 320; it++) {
// 斥力(库仑)
for (let i = 0; i < arr.length; i++) {
for (let j = i + 1; j < arr.length; j++) {
const a = arr[i],
b = arr[j];
let dx = a.x - b.x,
dy = a.y - b.y;
let d2 = dx * dx + dy * dy;
if (d2 < 1) {
d2 = 1;
dx = 1;
}
const d = Math.sqrt(d2);
const f = 2600 / d2;
a.x += (dx / d) * f;
a.y += (dy / d) * f;
b.x -= (dx / d) * f;
b.y -= (dy / d) * f;
}
}
// 边弹簧(理想长度 ~96
for (const e of edges) {
const a = nodes.get(e.s)!,
b = nodes.get(e.o)!;
const dx = b.x - a.x,
dy = b.y - a.y;
const d = Math.sqrt(dx * dx + dy * dy) || 1;
const f = (d - 96) * 0.012;
a.x += (dx / d) * f;
a.y += (dy / d) * f;
b.x -= (dx / d) * f;
b.y -= (dy / d) * f;
}
// 轻微居中 + 边界约束
for (const a of arr) {
a.x += (W / 2 - a.x) * 0.004;
a.y += (H / 2 - a.y) * 0.004;
a.x = Math.max(20, Math.min(W - 20, a.x));
a.y = Math.max(16, Math.min(H - 16, a.y));
}
}
return { nodes: arr, edges };
}
function nodeColor(deg: number): { fill: string; text: string } {
if (deg >= 4) return { fill: "#8b5cf6", text: "#ede9fe" }; // 枢纽:brand
if (deg >= 2) return { fill: "#22d3ee", text: "#083344" }; // 次枢纽:accent
return { fill: "#1a1f2d", text: "#cbd5e1" }; // 叶子
}
// GraphView 把知识三元组渲染为力导向图(实体=节点,关系=带标签的边),hover 高亮邻域。
export function GraphView({ triples, height = 360 }: { triples: Triple[]; height?: number }) {
const W = 560;
const H = height;
const [hover, setHover] = useState<string | null>(null);
const { nodes, edges } = useMemo(() => layout(triples, W, H), [triples, H]);
const pos = useMemo(() => new Map(nodes.map((n) => [n.id, n])), [nodes]);
if (triples.length === 0) {
return <EmptyState icon={Network} title="暂无图谱" desc="入库文本后,LLM 会抽取实体与关系,这里渲染为可交互的知识图谱。" />;
}
const neighbors = (id: string) => {
const s = new Set<string>([id]);
for (const e of edges) {
if (e.s === id) s.add(e.o);
if (e.o === id) s.add(e.s);
}
return s;
};
const active = hover ? neighbors(hover) : null;
const nodeOn = (id: string) => !active || active.has(id);
const edgeOn = (e: GEdge) => !hover || e.s === hover || e.o === hover;
return (
<div className="flex flex-col gap-2">
<svg viewBox={`0 0 ${W} ${H}`} className="w-full rounded-md border border-line bg-ink-950/60" style={{ height }}>
{edges.map((e, i) => {
const a = pos.get(e.s)!,
b = pos.get(e.o)!;
if (!a || !b) return null;
const on = edgeOn(e);
const mx = (a.x + b.x) / 2,
my = (a.y + b.y) / 2;
return (
<g key={i} opacity={on ? 1 : 0.12}>
<line x1={a.x} y1={a.y} x2={b.x} y2={b.y} stroke="#39435a" strokeWidth={1} />
{on && (
<text x={mx} y={my - 2} fill="#7c8aa5" fontSize={8.5} textAnchor="middle">
{e.p}
</text>
)}
</g>
);
})}
{nodes.map((n) => {
const c = nodeColor(n.deg);
const r = Math.min(7 + n.deg * 1.6, 16);
const on = nodeOn(n.id);
return (
<g
key={n.id}
opacity={on ? 1 : 0.2}
onMouseEnter={() => setHover(n.id)}
onMouseLeave={() => setHover(null)}
style={{ cursor: "pointer" }}
>
<circle cx={n.x} cy={n.y} r={r} fill={c.fill} stroke={hover === n.id ? "#fff" : "#0b0d12"} strokeWidth={hover === n.id ? 2 : 1.5} />
<text x={n.x} y={n.y + r + 9} fill="#cbd5e1" fontSize={9.5} textAnchor="middle">
{n.id.length > 10 ? n.id.slice(0, 10) + "…" : n.id}
</text>
</g>
);
})}
</svg>
<div className="flex items-center gap-3 px-1 text-[10px] text-slate-500">
<span>{nodes.length} · {edges.length} </span>
<span className="flex items-center gap-1"><span className="h-2 w-2 rounded-full" style={{ background: "#8b5cf6" }} /> </span>
<span className="flex items-center gap-1"><span className="h-2 w-2 rounded-full" style={{ background: "#22d3ee" }} /> </span>
<span className="ml-auto"></span>
</div>
</div>
);
}
+2
View File
@@ -86,6 +86,8 @@ export interface IngestEvent {
done?: number;
total?: number;
chunks?: string[];
preview?: string; // 解析阶段:解析出的文本片段
triples?: Triple[]; // 抽实体阶段:LLM 抽出的知识三元组
error?: string;
}
+181 -104
View File
@@ -1,6 +1,21 @@
import { useRef, useState } from "react";
import { Upload, Search, Network, FileUp } from "lucide-react";
import {
Upload,
FileUp,
Search,
Network,
Database,
FileText,
Scissors,
Sparkles,
Share2,
CheckCircle2,
XCircle,
Loader2,
type LucideIcon,
} from "lucide-react";
import { ingestKb, ingestFile, streamIngest, searchKb, graphKb, type IngestEvent, type KbHit, type Triple } from "../lib/api";
import { GraphView } from "../components/GraphView";
import { Button, Input, Textarea, Badge, cn, useToast } from "../ui";
interface IngestLog {
@@ -8,17 +23,50 @@ interface IngestLog {
msg: string;
ok: boolean;
}
interface Step {
stage: string;
msg: string;
}
interface Progress {
active: boolean;
stage: string;
done?: number;
total?: number;
chunks: string[];
preview?: string;
triples: Triple[];
error?: string;
steps: Step[];
vecDone?: number;
vecTotal?: number;
}
// 知识库管理:实时入库监控(解析→切块→向量化→写入 + 拆分可视化)+ 检索调试台
// 阶段元数据:图标 + 中文标签(与后端 IngestEvent.stage 对应)
const STAGE: Record<string, { icon: LucideIcon; label: string }> = {
: { icon: Upload, label: "解析文件" },
: { icon: FileText, label: "解析完成" },
: { icon: Scissors, label: "切块" },
: { icon: Sparkles, label: "向量化" },
Milvus: { icon: Database, label: "写入向量库" },
Bleve: { icon: Search, label: "写入全文索引" },
: { icon: Network, label: "抽取知识" },
Neo4j: { icon: Share2, label: "写入图谱" },
: { icon: CheckCircle2, label: "完成" },
: { icon: XCircle, label: "失败" },
};
function dedupTriples(ts: Triple[]): Triple[] {
const seen = new Set<string>();
const out: Triple[] = [];
for (const t of ts) {
const k = `${t.s}|${t.p}|${t.o}`;
if (!seen.has(k)) {
seen.add(k);
out.push(t);
}
}
return out;
}
// 知识库管理:实时入库时间线(解析预览 / 切块 / 向量化 / 知识抽取实时浮现)+ 力导向知识图谱 + 混合检索。
export function KbView() {
const toast = useToast();
const [kb, setKb] = useState("docs");
@@ -46,25 +94,42 @@ export function KbView() {
const ingesting = prog?.active ?? false;
const follow = (job: string, label: string) => {
setProg({ active: true, stage: "提交", chunks: [] });
setProg({ active: true, stage: "提交", chunks: [], triples: [], steps: [{ stage: "提交", msg: label }] });
streamIngest(
job,
(ev: IngestEvent) =>
setProg((p) => ({
active: ev.stage !== "完成" && ev.stage !== "失败",
stage: ev.stage,
done: ev.done ?? p?.done,
total: ev.total ?? p?.total,
chunks: ev.chunks ?? p?.chunks ?? [],
error: ev.error,
})),
() =>
setProg((p) => {
const base: Progress = p ?? { active: true, stage: "提交", chunks: [], triples: [], steps: [] };
const steps = [...base.steps];
const i = steps.findIndex((s) => s.stage === ev.stage);
const msg = ev.msg ?? (i >= 0 ? steps[i].msg : "");
if (i >= 0) steps[i] = { stage: ev.stage, msg };
else steps.push({ stage: ev.stage, msg });
return {
active: ev.stage !== "完成" && ev.stage !== "失败",
stage: ev.stage,
chunks: ev.chunks ?? base.chunks,
preview: ev.preview ?? base.preview,
triples: ev.triples?.length ? dedupTriples([...base.triples, ...ev.triples]) : base.triples,
error: ev.error,
steps,
vecDone: ev.stage === "向量化" ? ev.done : base.vecDone,
vecTotal: ev.stage === "向量化" ? ev.total : base.vecTotal,
};
}),
() => {
setProg((p) => {
const ok = p?.stage !== "失败";
setLogs((l) => [{ t: stamp(), msg: ok ? `${label}${p?.total ?? 0}入库完成` : `${label}${p?.error ?? "失败"}`, ok }, ...l]);
toast.push(ok ? "success" : "error", ok ? `${label} 入库完成` : `${label} 入库失败`);
setLogs((l) => [{ t: stamp(), msg: ok ? `${label}:入库完成` : `${label}${p?.error ?? "失败"}`, ok }, ...l]);
if (ok) {
toast.push("success", `${label} 入库完成`);
void onGraph(); // 刷新右侧知识图谱(含新抽取的实体关系)
} else {
toast.push("error", `${label} 入库失败`);
}
return p ? { ...p, active: false } : null;
}),
});
},
() => setProg((p) => (p ? { ...p, active: false, stage: "连接中断" } : null)),
);
};
@@ -105,19 +170,20 @@ export function KbView() {
}
};
const pct = prog?.total ? Math.round(((prog.done ?? 0) / prog.total) * 100) : 0;
const vecPct = prog?.vecTotal ? Math.round(((prog.vecDone ?? 0) / prog.vecTotal) * 100) : 0;
const graphData = graph ?? prog?.triples ?? null;
return (
<div className="flex h-full flex-col">
<div className="flex items-center gap-2 border-b border-line bg-ink-900 px-4 py-2">
<span className="text-sm font-semibold text-slate-300"></span>
<Input className="h-8 w-40" value={kb} onChange={(e) => setKb(e.target.value)} placeholder="知识库名" title="知识库(Milvus kb 字段分区)" />
<span className="text-[11px] text-slate-500"> / / / </span>
<span className="text-[11px] text-slate-500"> / / / / </span>
</div>
<div className="flex min-h-0 flex-1">
{/* 左:入库 + 实时监控 */}
<section className="flex w-1/2 flex-col border-r border-line p-4">
{/* 左:入库 + 实时时间线 */}
<section className="flex w-1/2 flex-col overflow-y-auto border-r border-line p-4">
<h3 className="mb-2 text-xs font-semibold text-slate-400"></h3>
<div
onDragOver={(e) => {
@@ -133,7 +199,7 @@ export function KbView() {
}}
className={cn("relative rounded-md", dragOver && "ring-2 ring-brand")}
>
<Textarea className="h-24 w-full resize-none" value={text} onChange={(e) => setText(e.target.value)} placeholder="每行一条知识,或把文件拖到这里 / 点选择文件" />
<Textarea className="h-20 w-full resize-none" value={text} onChange={(e) => setText(e.target.value)} placeholder="每行一条知识,或把文件拖到这里 / 点选择文件" />
{dragOver && (
<div className="pointer-events-none absolute inset-0 flex items-center justify-center rounded-md bg-ink-950/85 text-xs font-medium text-brand-400">
@@ -148,71 +214,14 @@ export function KbView() {
<Button size="sm" icon={FileUp} onClick={() => fileRef.current?.click()} disabled={ingesting}>
</Button>
<input
ref={fileRef}
type="file"
accept=".txt,.md,.csv,.docx,.xlsx,.pdf"
onChange={(e) => onFile(e.target.files?.[0])}
disabled={ingesting}
className="hidden"
/>
<input ref={fileRef} type="file" accept=".txt,.md,.csv,.docx,.xlsx,.pdf" onChange={(e) => onFile(e.target.files?.[0])} disabled={ingesting} className="hidden" />
</div>
<span className="mt-1 text-[10px] text-slate-500"> txt/md/csv/docx/xlsx/pdfdocx/xlsx/pdf mcp-py </span>
{/* 实时流水线进度 */}
{prog && (
<div className="mt-3 rounded-lg border border-line bg-ink-850 p-2.5">
<div className="flex flex-wrap items-center gap-1.5">
{["解析", "切块", "向量化", "写Milvus", "写Bleve", "完成"].map((s) => {
const active = prog.stage.startsWith(s) || (s === "完成" && prog.stage === "完成");
const passed = stageOrder(prog.stage) > stageOrder(s);
return (
<span
key={s}
className={cn(
"rounded px-1.5 py-0.5 text-[10px]",
prog.stage === "失败"
? "bg-ink-800 text-slate-600"
: active
? "bg-brand text-white shadow-glow"
: passed
? "bg-success/15 text-success"
: "bg-ink-800 text-slate-600",
)}
>
{s}
</span>
);
})}
</div>
{prog.error && <p className="mt-1 text-[11px] text-danger"> {prog.error}</p>}
{prog.total ? (
<div className="mt-2">
<div className="h-1.5 w-full overflow-hidden rounded-full bg-ink-700">
<div className="h-full rounded-full bg-gradient-to-r from-brand to-accent transition-all" style={{ width: `${pct}%` }} />
</div>
<div className="mt-1 text-[10px] text-slate-500">
{prog.done ?? 0}/{prog.total} {pct}%
</div>
</div>
) : null}
{prog.chunks.length > 0 && (
<div className="mt-2">
<div className="text-[10px] font-medium text-slate-500"> {prog.chunks.length} </div>
<ul className="mt-1 max-h-24 space-y-0.5 overflow-auto">
{prog.chunks.map((c, i) => (
<li key={i} className="truncate rounded bg-ink-800 px-1.5 py-0.5 text-[10px] text-slate-400">
<span className="text-slate-600">#{i + 1}</span> {c}
</li>
))}
</ul>
</div>
)}
</div>
)}
{prog && <Timeline prog={prog} vecPct={vecPct} />}
<h3 className="mb-1 mt-4 text-xs font-semibold text-slate-400"></h3>
<ul className="flex-1 space-y-1 overflow-auto">
<ul className="space-y-1">
{logs.length === 0 && <li className="text-xs text-slate-600"></li>}
{logs.map((l, i) => (
<li key={i} className={cn("text-xs", l.ok ? "text-success" : "text-danger")}>
@@ -222,8 +231,8 @@ export function KbView() {
</ul>
</section>
{/* 右:检索调试台 */}
<section className="flex w-1/2 flex-col p-4">
{/* 右:检索台 + 知识图谱 */}
<section className="flex w-1/2 flex-col overflow-y-auto p-4">
<h3 className="mb-2 text-xs font-semibold text-slate-400"> + rerank</h3>
<div className="flex gap-2">
<Input className="flex-1" value={q} onChange={(e) => setQ(e.target.value)} onKeyDown={(e) => e.key === "Enter" && onSearch()} placeholder="输入查询,语义召回相关片段…" />
@@ -232,7 +241,7 @@ export function KbView() {
{searching ? "检索中…" : "检索"}
</Button>
</div>
<ul className="mt-3 max-h-[40%] space-y-2 overflow-auto">
<ul className="mt-3 space-y-2">
{hits === null && <li className="text-xs text-slate-600"></li>}
{hits !== null && hits.length === 0 && <li className="text-xs text-slate-600"> RAG </li>}
{hits?.map((h, i) => (
@@ -247,32 +256,100 @@ export function KbView() {
))}
</ul>
{/* 知识图谱(Neo4j / GraphRAG */}
<div className="mt-3 flex items-center justify-between border-t border-line pt-2">
<h3 className="text-xs font-semibold text-slate-400">Neo4j</h3>
<div className="mt-4 flex items-center justify-between border-t border-line pt-3">
<h3 className="text-xs font-semibold text-slate-400">Neo4j · </h3>
<Button size="sm" icon={Network} onClick={onGraph}>
</Button>
</div>
<ul className="mt-2 flex-1 space-y-1 overflow-auto">
{graph === null && <li className="text-[11px] text-slate-600"></li>}
{graph !== null && graph.length === 0 && <li className="text-[11px] text-slate-600"> chat + </li>}
{graph?.map((t, i) => (
<li key={i} className="flex items-center gap-1 text-[11px]">
<Badge tone="warn">{t.s}</Badge>
<span className="text-slate-500">{t.p}</span>
<Badge tone="success">{t.o}</Badge>
</li>
))}
</ul>
<div className="mt-2">
<GraphView triples={graphData ?? []} />
</div>
</section>
</div>
</div>
);
}
const STAGES = ["解析", "解析完成", "切块", "向量化", "写Milvus", "写Bleve", "完成"];
function stageOrder(stage: string): number {
const i = STAGES.findIndex((s) => stage.startsWith(s));
return i < 0 ? -1 : i;
// Timeline 渲染入库各阶段:状态灯 + 标签 + 详情;解析预览、切块块、抽取的知识三元组逐步呈现。
function Timeline({ prog, vecPct }: { prog: Progress; vecPct: number }) {
return (
<div className="mt-3 rounded-lg border border-line bg-ink-850 p-3">
<ol className="relative ml-1 space-y-2 border-l border-line pl-4">
{prog.steps.map((s, i) => {
const meta = STAGE[s.stage] ?? { icon: Loader2, label: s.stage };
const Icon = meta.icon;
const isLast = i === prog.steps.length - 1;
const isErr = prog.error && s.stage === "失败";
const status = isErr ? "error" : isLast && prog.active ? "active" : "done";
return (
<li key={i} className="relative">
<span className="absolute -left-[21px] top-0.5 flex h-3.5 w-3.5 items-center justify-center rounded-full bg-ink-850">
{status === "active" ? (
<Loader2 className="h-3.5 w-3.5 animate-spin text-accent-400" />
) : status === "error" ? (
<XCircle className="h-3.5 w-3.5 text-danger" />
) : (
<CheckCircle2 className="h-3.5 w-3.5 text-success" />
)}
</span>
<div className="flex items-center gap-2">
<Icon className="h-3.5 w-3.5 text-slate-400" />
<span className="text-xs font-medium text-slate-200">{meta.label}</span>
{s.msg && <span className="text-[11px] text-slate-500">{s.msg}</span>}
</div>
{/* 向量化进度条 */}
{s.stage === "向量化" && prog.vecTotal ? (
<div className="mt-1">
<div className="h-1.5 w-full overflow-hidden rounded-full bg-ink-700">
<div className="h-full rounded-full bg-gradient-to-r from-brand to-accent transition-all" style={{ width: `${vecPct}%` }} />
</div>
<div className="mt-0.5 text-[10px] text-slate-500">{prog.vecDone ?? 0}/{prog.vecTotal} {vecPct}%</div>
</div>
) : null}
{/* 解析预览 */}
{s.stage === "解析完成" && prog.preview ? (
<p className="mt-1 max-h-16 overflow-hidden rounded bg-ink-900 px-2 py-1 text-[11px] leading-relaxed text-slate-400">{prog.preview}</p>
) : null}
{/* 切块预览 */}
{s.stage === "切块" && prog.chunks.length > 0 ? (
<ul className="mt-1 max-h-20 space-y-0.5 overflow-auto">
{prog.chunks.map((c, j) => (
<li key={j} className="truncate rounded bg-ink-900 px-1.5 py-0.5 text-[10px] text-slate-400">
<span className="text-slate-600">#{j + 1}</span> {c}
</li>
))}
</ul>
) : null}
</li>
);
})}
</ol>
{/* 抽取出的知识:三元组 chips + 实时小图谱 */}
{prog.triples.length > 0 && (
<div className="mt-3 border-t border-line pt-2">
<div className="mb-1.5 flex items-center gap-1.5 text-[11px] font-medium text-slate-400">
<Network className="h-3.5 w-3.5 text-brand-400" /> {prog.triples.length}
</div>
<div className="flex flex-wrap gap-1">
{prog.triples.slice(0, 12).map((t, i) => (
<span key={i} className="inline-flex items-center gap-1 rounded bg-ink-900 px-1.5 py-0.5 text-[10px]">
<span className="text-brand-400">{t.s}</span>
<span className="text-slate-600">{t.p}</span>
<span className="text-accent-400">{t.o}</span>
</span>
))}
{prog.triples.length > 12 && <span className="px-1 text-[10px] text-slate-600">+{prog.triples.length - 12}</span>}
</div>
<div className="mt-2">
<GraphView triples={prog.triples} height={200} />
</div>
</div>
)}
</div>
);
}