feat(kb): 入库可视化做厚 —— 文件解析/知识抽取过程 + 力导向知识图谱

把"进度条"升级成可观测的入库工作台,回应三点诉求:解析过程、知识抽取过程、丰富图谱。

- contract: IngestEvent 加 Preview(解析文本预览)+ Triples[]TripleView(抽出的三元组)。
- 后端回流:rag.Ingest 抽实体阶段把 LLM 抽出的三元组实时回流(边出现边渲染);
  gateway 解析完成回流文件类型 + 文本预览片段。
- 前端 GraphView.tsx:零依赖自建力导向布局(斥力+边弹簧+居中静态收敛),实体=节点
  按度着色(枢纽紫/关联青/叶子)、关系=带标签边、hover 高亮邻域、节点过多按度裁剪。
- 前端 KbView 重做:入库从"阶段徽标+进度条"→竖向时间线(解析预览/切块块/向量化进度/
  抽取知识三元组 chips + 实时小图谱逐步浮现);右侧知识图谱从扁平列表→GraphView,
  入库完成自动刷新整库图谱。

验证(Preview):入库一段多事实文本 → 时间线逐阶段点亮、抽出 17 条三元组实时浮现、
右侧力导向图渲染 sundynix-agentix/知识库 为枢纽 + 带标签关系边。tsc+vite+后端 build 通过。

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Blizzard
2026-06-13 14:22:50 +08:00
parent d5dfb7a928
commit 72e008bfe8
6 changed files with 408 additions and 112 deletions
@@ -0,0 +1,169 @@
import { useMemo, useState } from "react";
import { Network } from "lucide-react";
import type { Triple } from "../lib/api";
import { EmptyState } from "../ui";
interface GNode {
id: string;
x: number;
y: number;
deg: number;
}
interface GEdge {
s: string;
o: string;
p: string;
}
// layout 用一个轻量力导向模拟(斥力 + 边弹簧 + 居中)把三元组排成图。
// 静态收敛(useMemo 内跑固定迭代),零依赖;节点过多时按度裁剪。
function layout(triples: Triple[], W: number, H: number): { nodes: GNode[]; edges: GEdge[] } {
const deg = new Map<string, number>();
for (const t of triples) {
if (!t.s || !t.o) continue;
deg.set(t.s, (deg.get(t.s) ?? 0) + 1);
deg.set(t.o, (deg.get(t.o) ?? 0) + 1);
}
// 裁剪:实体过多只留度最高的 N 个,保留两端都在集合内的边。
let names = [...deg.keys()];
const CAP = 60;
if (names.length > CAP) {
names = names.sort((a, b) => (deg.get(b)! - deg.get(a)!)).slice(0, CAP);
}
const keep = new Set(names);
const edges = triples.filter((t) => keep.has(t.s) && keep.has(t.o)).map((t) => ({ s: t.s, o: t.o, p: t.p }));
const nodes = new Map<string, GNode>();
const R = Math.min(W, H) * 0.36;
names.forEach((n, i) => {
const a = (2 * Math.PI * i) / names.length;
// 初始撒在圆周上(确定性,避免每次重排抖动)。
nodes.set(n, { id: n, x: W / 2 + Math.cos(a) * R, y: H / 2 + Math.sin(a) * R, deg: deg.get(n)! });
});
const arr = [...nodes.values()];
for (let it = 0; it < 320; it++) {
// 斥力(库仑)
for (let i = 0; i < arr.length; i++) {
for (let j = i + 1; j < arr.length; j++) {
const a = arr[i],
b = arr[j];
let dx = a.x - b.x,
dy = a.y - b.y;
let d2 = dx * dx + dy * dy;
if (d2 < 1) {
d2 = 1;
dx = 1;
}
const d = Math.sqrt(d2);
const f = 2600 / d2;
a.x += (dx / d) * f;
a.y += (dy / d) * f;
b.x -= (dx / d) * f;
b.y -= (dy / d) * f;
}
}
// 边弹簧(理想长度 ~96
for (const e of edges) {
const a = nodes.get(e.s)!,
b = nodes.get(e.o)!;
const dx = b.x - a.x,
dy = b.y - a.y;
const d = Math.sqrt(dx * dx + dy * dy) || 1;
const f = (d - 96) * 0.012;
a.x += (dx / d) * f;
a.y += (dy / d) * f;
b.x -= (dx / d) * f;
b.y -= (dy / d) * f;
}
// 轻微居中 + 边界约束
for (const a of arr) {
a.x += (W / 2 - a.x) * 0.004;
a.y += (H / 2 - a.y) * 0.004;
a.x = Math.max(20, Math.min(W - 20, a.x));
a.y = Math.max(16, Math.min(H - 16, a.y));
}
}
return { nodes: arr, edges };
}
function nodeColor(deg: number): { fill: string; text: string } {
if (deg >= 4) return { fill: "#8b5cf6", text: "#ede9fe" }; // 枢纽:brand
if (deg >= 2) return { fill: "#22d3ee", text: "#083344" }; // 次枢纽:accent
return { fill: "#1a1f2d", text: "#cbd5e1" }; // 叶子
}
// GraphView 把知识三元组渲染为力导向图(实体=节点,关系=带标签的边),hover 高亮邻域。
export function GraphView({ triples, height = 360 }: { triples: Triple[]; height?: number }) {
const W = 560;
const H = height;
const [hover, setHover] = useState<string | null>(null);
const { nodes, edges } = useMemo(() => layout(triples, W, H), [triples, H]);
const pos = useMemo(() => new Map(nodes.map((n) => [n.id, n])), [nodes]);
if (triples.length === 0) {
return <EmptyState icon={Network} title="暂无图谱" desc="入库文本后,LLM 会抽取实体与关系,这里渲染为可交互的知识图谱。" />;
}
const neighbors = (id: string) => {
const s = new Set<string>([id]);
for (const e of edges) {
if (e.s === id) s.add(e.o);
if (e.o === id) s.add(e.s);
}
return s;
};
const active = hover ? neighbors(hover) : null;
const nodeOn = (id: string) => !active || active.has(id);
const edgeOn = (e: GEdge) => !hover || e.s === hover || e.o === hover;
return (
<div className="flex flex-col gap-2">
<svg viewBox={`0 0 ${W} ${H}`} className="w-full rounded-md border border-line bg-ink-950/60" style={{ height }}>
{edges.map((e, i) => {
const a = pos.get(e.s)!,
b = pos.get(e.o)!;
if (!a || !b) return null;
const on = edgeOn(e);
const mx = (a.x + b.x) / 2,
my = (a.y + b.y) / 2;
return (
<g key={i} opacity={on ? 1 : 0.12}>
<line x1={a.x} y1={a.y} x2={b.x} y2={b.y} stroke="#39435a" strokeWidth={1} />
{on && (
<text x={mx} y={my - 2} fill="#7c8aa5" fontSize={8.5} textAnchor="middle">
{e.p}
</text>
)}
</g>
);
})}
{nodes.map((n) => {
const c = nodeColor(n.deg);
const r = Math.min(7 + n.deg * 1.6, 16);
const on = nodeOn(n.id);
return (
<g
key={n.id}
opacity={on ? 1 : 0.2}
onMouseEnter={() => setHover(n.id)}
onMouseLeave={() => setHover(null)}
style={{ cursor: "pointer" }}
>
<circle cx={n.x} cy={n.y} r={r} fill={c.fill} stroke={hover === n.id ? "#fff" : "#0b0d12"} strokeWidth={hover === n.id ? 2 : 1.5} />
<text x={n.x} y={n.y + r + 9} fill="#cbd5e1" fontSize={9.5} textAnchor="middle">
{n.id.length > 10 ? n.id.slice(0, 10) + "…" : n.id}
</text>
</g>
);
})}
</svg>
<div className="flex items-center gap-3 px-1 text-[10px] text-slate-500">
<span>{nodes.length} · {edges.length} </span>
<span className="flex items-center gap-1"><span className="h-2 w-2 rounded-full" style={{ background: "#8b5cf6" }} /> </span>
<span className="flex items-center gap-1"><span className="h-2 w-2 rounded-full" style={{ background: "#22d3ee" }} /> </span>
<span className="ml-auto"></span>
</div>
</div>
);
}
+2
View File
@@ -86,6 +86,8 @@ export interface IngestEvent {
done?: number;
total?: number;
chunks?: string[];
preview?: string; // 解析阶段:解析出的文本片段
triples?: Triple[]; // 抽实体阶段:LLM 抽出的知识三元组
error?: string;
}
+181 -104
View File
@@ -1,6 +1,21 @@
import { useRef, useState } from "react";
import { Upload, Search, Network, FileUp } from "lucide-react";
import {
Upload,
FileUp,
Search,
Network,
Database,
FileText,
Scissors,
Sparkles,
Share2,
CheckCircle2,
XCircle,
Loader2,
type LucideIcon,
} from "lucide-react";
import { ingestKb, ingestFile, streamIngest, searchKb, graphKb, type IngestEvent, type KbHit, type Triple } from "../lib/api";
import { GraphView } from "../components/GraphView";
import { Button, Input, Textarea, Badge, cn, useToast } from "../ui";
interface IngestLog {
@@ -8,17 +23,50 @@ interface IngestLog {
msg: string;
ok: boolean;
}
interface Step {
stage: string;
msg: string;
}
interface Progress {
active: boolean;
stage: string;
done?: number;
total?: number;
chunks: string[];
preview?: string;
triples: Triple[];
error?: string;
steps: Step[];
vecDone?: number;
vecTotal?: number;
}
// 知识库管理:实时入库监控(解析→切块→向量化→写入 + 拆分可视化)+ 检索调试台
// 阶段元数据:图标 + 中文标签(与后端 IngestEvent.stage 对应)
const STAGE: Record<string, { icon: LucideIcon; label: string }> = {
: { icon: Upload, label: "解析文件" },
: { icon: FileText, label: "解析完成" },
: { icon: Scissors, label: "切块" },
: { icon: Sparkles, label: "向量化" },
Milvus: { icon: Database, label: "写入向量库" },
Bleve: { icon: Search, label: "写入全文索引" },
: { icon: Network, label: "抽取知识" },
Neo4j: { icon: Share2, label: "写入图谱" },
: { icon: CheckCircle2, label: "完成" },
: { icon: XCircle, label: "失败" },
};
function dedupTriples(ts: Triple[]): Triple[] {
const seen = new Set<string>();
const out: Triple[] = [];
for (const t of ts) {
const k = `${t.s}|${t.p}|${t.o}`;
if (!seen.has(k)) {
seen.add(k);
out.push(t);
}
}
return out;
}
// 知识库管理:实时入库时间线(解析预览 / 切块 / 向量化 / 知识抽取实时浮现)+ 力导向知识图谱 + 混合检索。
export function KbView() {
const toast = useToast();
const [kb, setKb] = useState("docs");
@@ -46,25 +94,42 @@ export function KbView() {
const ingesting = prog?.active ?? false;
const follow = (job: string, label: string) => {
setProg({ active: true, stage: "提交", chunks: [] });
setProg({ active: true, stage: "提交", chunks: [], triples: [], steps: [{ stage: "提交", msg: label }] });
streamIngest(
job,
(ev: IngestEvent) =>
setProg((p) => ({
active: ev.stage !== "完成" && ev.stage !== "失败",
stage: ev.stage,
done: ev.done ?? p?.done,
total: ev.total ?? p?.total,
chunks: ev.chunks ?? p?.chunks ?? [],
error: ev.error,
})),
() =>
setProg((p) => {
const base: Progress = p ?? { active: true, stage: "提交", chunks: [], triples: [], steps: [] };
const steps = [...base.steps];
const i = steps.findIndex((s) => s.stage === ev.stage);
const msg = ev.msg ?? (i >= 0 ? steps[i].msg : "");
if (i >= 0) steps[i] = { stage: ev.stage, msg };
else steps.push({ stage: ev.stage, msg });
return {
active: ev.stage !== "完成" && ev.stage !== "失败",
stage: ev.stage,
chunks: ev.chunks ?? base.chunks,
preview: ev.preview ?? base.preview,
triples: ev.triples?.length ? dedupTriples([...base.triples, ...ev.triples]) : base.triples,
error: ev.error,
steps,
vecDone: ev.stage === "向量化" ? ev.done : base.vecDone,
vecTotal: ev.stage === "向量化" ? ev.total : base.vecTotal,
};
}),
() => {
setProg((p) => {
const ok = p?.stage !== "失败";
setLogs((l) => [{ t: stamp(), msg: ok ? `${label}${p?.total ?? 0}入库完成` : `${label}${p?.error ?? "失败"}`, ok }, ...l]);
toast.push(ok ? "success" : "error", ok ? `${label} 入库完成` : `${label} 入库失败`);
setLogs((l) => [{ t: stamp(), msg: ok ? `${label}:入库完成` : `${label}${p?.error ?? "失败"}`, ok }, ...l]);
if (ok) {
toast.push("success", `${label} 入库完成`);
void onGraph(); // 刷新右侧知识图谱(含新抽取的实体关系)
} else {
toast.push("error", `${label} 入库失败`);
}
return p ? { ...p, active: false } : null;
}),
});
},
() => setProg((p) => (p ? { ...p, active: false, stage: "连接中断" } : null)),
);
};
@@ -105,19 +170,20 @@ export function KbView() {
}
};
const pct = prog?.total ? Math.round(((prog.done ?? 0) / prog.total) * 100) : 0;
const vecPct = prog?.vecTotal ? Math.round(((prog.vecDone ?? 0) / prog.vecTotal) * 100) : 0;
const graphData = graph ?? prog?.triples ?? null;
return (
<div className="flex h-full flex-col">
<div className="flex items-center gap-2 border-b border-line bg-ink-900 px-4 py-2">
<span className="text-sm font-semibold text-slate-300"></span>
<Input className="h-8 w-40" value={kb} onChange={(e) => setKb(e.target.value)} placeholder="知识库名" title="知识库(Milvus kb 字段分区)" />
<span className="text-[11px] text-slate-500"> / / / </span>
<span className="text-[11px] text-slate-500"> / / / / </span>
</div>
<div className="flex min-h-0 flex-1">
{/* 左:入库 + 实时监控 */}
<section className="flex w-1/2 flex-col border-r border-line p-4">
{/* 左:入库 + 实时时间线 */}
<section className="flex w-1/2 flex-col overflow-y-auto border-r border-line p-4">
<h3 className="mb-2 text-xs font-semibold text-slate-400"></h3>
<div
onDragOver={(e) => {
@@ -133,7 +199,7 @@ export function KbView() {
}}
className={cn("relative rounded-md", dragOver && "ring-2 ring-brand")}
>
<Textarea className="h-24 w-full resize-none" value={text} onChange={(e) => setText(e.target.value)} placeholder="每行一条知识,或把文件拖到这里 / 点选择文件" />
<Textarea className="h-20 w-full resize-none" value={text} onChange={(e) => setText(e.target.value)} placeholder="每行一条知识,或把文件拖到这里 / 点选择文件" />
{dragOver && (
<div className="pointer-events-none absolute inset-0 flex items-center justify-center rounded-md bg-ink-950/85 text-xs font-medium text-brand-400">
@@ -148,71 +214,14 @@ export function KbView() {
<Button size="sm" icon={FileUp} onClick={() => fileRef.current?.click()} disabled={ingesting}>
</Button>
<input
ref={fileRef}
type="file"
accept=".txt,.md,.csv,.docx,.xlsx,.pdf"
onChange={(e) => onFile(e.target.files?.[0])}
disabled={ingesting}
className="hidden"
/>
<input ref={fileRef} type="file" accept=".txt,.md,.csv,.docx,.xlsx,.pdf" onChange={(e) => onFile(e.target.files?.[0])} disabled={ingesting} className="hidden" />
</div>
<span className="mt-1 text-[10px] text-slate-500"> txt/md/csv/docx/xlsx/pdfdocx/xlsx/pdf mcp-py </span>
{/* 实时流水线进度 */}
{prog && (
<div className="mt-3 rounded-lg border border-line bg-ink-850 p-2.5">
<div className="flex flex-wrap items-center gap-1.5">
{["解析", "切块", "向量化", "写Milvus", "写Bleve", "完成"].map((s) => {
const active = prog.stage.startsWith(s) || (s === "完成" && prog.stage === "完成");
const passed = stageOrder(prog.stage) > stageOrder(s);
return (
<span
key={s}
className={cn(
"rounded px-1.5 py-0.5 text-[10px]",
prog.stage === "失败"
? "bg-ink-800 text-slate-600"
: active
? "bg-brand text-white shadow-glow"
: passed
? "bg-success/15 text-success"
: "bg-ink-800 text-slate-600",
)}
>
{s}
</span>
);
})}
</div>
{prog.error && <p className="mt-1 text-[11px] text-danger"> {prog.error}</p>}
{prog.total ? (
<div className="mt-2">
<div className="h-1.5 w-full overflow-hidden rounded-full bg-ink-700">
<div className="h-full rounded-full bg-gradient-to-r from-brand to-accent transition-all" style={{ width: `${pct}%` }} />
</div>
<div className="mt-1 text-[10px] text-slate-500">
{prog.done ?? 0}/{prog.total} {pct}%
</div>
</div>
) : null}
{prog.chunks.length > 0 && (
<div className="mt-2">
<div className="text-[10px] font-medium text-slate-500"> {prog.chunks.length} </div>
<ul className="mt-1 max-h-24 space-y-0.5 overflow-auto">
{prog.chunks.map((c, i) => (
<li key={i} className="truncate rounded bg-ink-800 px-1.5 py-0.5 text-[10px] text-slate-400">
<span className="text-slate-600">#{i + 1}</span> {c}
</li>
))}
</ul>
</div>
)}
</div>
)}
{prog && <Timeline prog={prog} vecPct={vecPct} />}
<h3 className="mb-1 mt-4 text-xs font-semibold text-slate-400"></h3>
<ul className="flex-1 space-y-1 overflow-auto">
<ul className="space-y-1">
{logs.length === 0 && <li className="text-xs text-slate-600"></li>}
{logs.map((l, i) => (
<li key={i} className={cn("text-xs", l.ok ? "text-success" : "text-danger")}>
@@ -222,8 +231,8 @@ export function KbView() {
</ul>
</section>
{/* 右:检索调试台 */}
<section className="flex w-1/2 flex-col p-4">
{/* 右:检索台 + 知识图谱 */}
<section className="flex w-1/2 flex-col overflow-y-auto p-4">
<h3 className="mb-2 text-xs font-semibold text-slate-400"> + rerank</h3>
<div className="flex gap-2">
<Input className="flex-1" value={q} onChange={(e) => setQ(e.target.value)} onKeyDown={(e) => e.key === "Enter" && onSearch()} placeholder="输入查询,语义召回相关片段…" />
@@ -232,7 +241,7 @@ export function KbView() {
{searching ? "检索中…" : "检索"}
</Button>
</div>
<ul className="mt-3 max-h-[40%] space-y-2 overflow-auto">
<ul className="mt-3 space-y-2">
{hits === null && <li className="text-xs text-slate-600"></li>}
{hits !== null && hits.length === 0 && <li className="text-xs text-slate-600"> RAG </li>}
{hits?.map((h, i) => (
@@ -247,32 +256,100 @@ export function KbView() {
))}
</ul>
{/* 知识图谱(Neo4j / GraphRAG */}
<div className="mt-3 flex items-center justify-between border-t border-line pt-2">
<h3 className="text-xs font-semibold text-slate-400">Neo4j</h3>
<div className="mt-4 flex items-center justify-between border-t border-line pt-3">
<h3 className="text-xs font-semibold text-slate-400">Neo4j · </h3>
<Button size="sm" icon={Network} onClick={onGraph}>
</Button>
</div>
<ul className="mt-2 flex-1 space-y-1 overflow-auto">
{graph === null && <li className="text-[11px] text-slate-600"></li>}
{graph !== null && graph.length === 0 && <li className="text-[11px] text-slate-600"> chat + </li>}
{graph?.map((t, i) => (
<li key={i} className="flex items-center gap-1 text-[11px]">
<Badge tone="warn">{t.s}</Badge>
<span className="text-slate-500">{t.p}</span>
<Badge tone="success">{t.o}</Badge>
</li>
))}
</ul>
<div className="mt-2">
<GraphView triples={graphData ?? []} />
</div>
</section>
</div>
</div>
);
}
const STAGES = ["解析", "解析完成", "切块", "向量化", "写Milvus", "写Bleve", "完成"];
function stageOrder(stage: string): number {
const i = STAGES.findIndex((s) => stage.startsWith(s));
return i < 0 ? -1 : i;
// Timeline 渲染入库各阶段:状态灯 + 标签 + 详情;解析预览、切块块、抽取的知识三元组逐步呈现。
function Timeline({ prog, vecPct }: { prog: Progress; vecPct: number }) {
return (
<div className="mt-3 rounded-lg border border-line bg-ink-850 p-3">
<ol className="relative ml-1 space-y-2 border-l border-line pl-4">
{prog.steps.map((s, i) => {
const meta = STAGE[s.stage] ?? { icon: Loader2, label: s.stage };
const Icon = meta.icon;
const isLast = i === prog.steps.length - 1;
const isErr = prog.error && s.stage === "失败";
const status = isErr ? "error" : isLast && prog.active ? "active" : "done";
return (
<li key={i} className="relative">
<span className="absolute -left-[21px] top-0.5 flex h-3.5 w-3.5 items-center justify-center rounded-full bg-ink-850">
{status === "active" ? (
<Loader2 className="h-3.5 w-3.5 animate-spin text-accent-400" />
) : status === "error" ? (
<XCircle className="h-3.5 w-3.5 text-danger" />
) : (
<CheckCircle2 className="h-3.5 w-3.5 text-success" />
)}
</span>
<div className="flex items-center gap-2">
<Icon className="h-3.5 w-3.5 text-slate-400" />
<span className="text-xs font-medium text-slate-200">{meta.label}</span>
{s.msg && <span className="text-[11px] text-slate-500">{s.msg}</span>}
</div>
{/* 向量化进度条 */}
{s.stage === "向量化" && prog.vecTotal ? (
<div className="mt-1">
<div className="h-1.5 w-full overflow-hidden rounded-full bg-ink-700">
<div className="h-full rounded-full bg-gradient-to-r from-brand to-accent transition-all" style={{ width: `${vecPct}%` }} />
</div>
<div className="mt-0.5 text-[10px] text-slate-500">{prog.vecDone ?? 0}/{prog.vecTotal} {vecPct}%</div>
</div>
) : null}
{/* 解析预览 */}
{s.stage === "解析完成" && prog.preview ? (
<p className="mt-1 max-h-16 overflow-hidden rounded bg-ink-900 px-2 py-1 text-[11px] leading-relaxed text-slate-400">{prog.preview}</p>
) : null}
{/* 切块预览 */}
{s.stage === "切块" && prog.chunks.length > 0 ? (
<ul className="mt-1 max-h-20 space-y-0.5 overflow-auto">
{prog.chunks.map((c, j) => (
<li key={j} className="truncate rounded bg-ink-900 px-1.5 py-0.5 text-[10px] text-slate-400">
<span className="text-slate-600">#{j + 1}</span> {c}
</li>
))}
</ul>
) : null}
</li>
);
})}
</ol>
{/* 抽取出的知识:三元组 chips + 实时小图谱 */}
{prog.triples.length > 0 && (
<div className="mt-3 border-t border-line pt-2">
<div className="mb-1.5 flex items-center gap-1.5 text-[11px] font-medium text-slate-400">
<Network className="h-3.5 w-3.5 text-brand-400" /> {prog.triples.length}
</div>
<div className="flex flex-wrap gap-1">
{prog.triples.slice(0, 12).map((t, i) => (
<span key={i} className="inline-flex items-center gap-1 rounded bg-ink-900 px-1.5 py-0.5 text-[10px]">
<span className="text-brand-400">{t.s}</span>
<span className="text-slate-600">{t.p}</span>
<span className="text-accent-400">{t.o}</span>
</span>
))}
{prog.triples.length > 12 && <span className="px-1 text-[10px] text-slate-600">+{prog.triples.length - 12}</span>}
</div>
<div className="mt-2">
<GraphView triples={prog.triples} height={200} />
</div>
</div>
)}
</div>
);
}
+34 -1
View File
@@ -7,6 +7,7 @@ import (
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"path/filepath"
@@ -74,7 +75,11 @@ func (h *Handler) runIngest(job, kb, filename string, data []byte, rawText strin
_ = h.bus.CompleteStream(job)
return
}
emit(contract.IngestEvent{Stage: "解析完成", Msg: "解析出 " + itoa(len([]rune(parsed))) + " 字"})
emit(contract.IngestEvent{
Stage: "解析完成",
Msg: fmt.Sprintf("%s · 解析出 %d 字", fileKind(filename), len([]rune(parsed))),
Preview: head(parsed, 240),
})
text = parsed
}
@@ -157,6 +162,34 @@ func itoa(n int) string {
return string(b)
}
// head 取文本前 n 个字符(按 rune),用于解析预览。
func head(s string, n int) string {
s = strings.TrimSpace(s)
r := []rune(s)
if len(r) <= n {
return s
}
return string(r[:n]) + "…"
}
// fileKind 由扩展名给出可读的文件类型标签。
func fileKind(filename string) string {
switch strings.ToLower(filepath.Ext(filename)) {
case ".docx":
return "Word 文档"
case ".xlsx", ".xls":
return "Excel 表格"
case ".pdf":
return "PDF"
case ".csv":
return "CSV"
case ".md", ".markdown":
return "Markdown"
default:
return "文本"
}
}
// parseFile 把文件字节转为纯文本:文本类直读,其余经 mcp-py parse_document(算法层)。
func (h *Handler) parseFile(ctx context.Context, filename string, data []byte) (string, error) {
switch strings.ToLower(filepath.Ext(filename)) {
+7 -1
View File
@@ -153,11 +153,17 @@ func (e *Engine) Ingest(ctx context.Context, kb, text string, onProgress func(co
// 图谱路:LLM 抽实体/关系 → Neo4j(可降级,不阻断向量入库)。
if e.graph.ready() && e.chatClient().ready() {
emit(contract.IngestEvent{Stage: "抽实体", Msg: "LLM 抽取知识三元组"})
emit(contract.IngestEvent{Stage: "抽实体", Msg: "LLM 正在抽取知识三元组"})
triples, terr := extractTriples(ctx, e.chatClient(), text)
if terr != nil {
log.Printf("[rag] 三元组抽取失败(图谱降级): %v", terr)
} else if len(triples) > 0 {
// 把抽出的三元组实时回流给 UI(边出现边渲染图谱)。
tv := make([]contract.TripleView, len(triples))
for i, t := range triples {
tv[i] = contract.TripleView{S: t.S, P: t.P, O: t.O}
}
emit(contract.IngestEvent{Stage: "抽实体", Total: len(triples), Triples: tv, Msg: "抽出 " + itoa(len(triples)) + " 条知识三元组"})
emit(contract.IngestEvent{Stage: "写Neo4j", Total: len(triples), Msg: itoa(len(triples)) + " 条三元组写入图谱"})
if n, gerr := e.graph.store(ctx, kb, triples); gerr != nil {
log.Printf("[rag] 写 Neo4j 失败(图谱降级): %v", gerr)
+15 -6
View File
@@ -67,14 +67,23 @@ type ExecEvent struct {
MS int64 `json:"ms,omitempty"` // end 事件的耗时(毫秒)
}
// TripleView 是回流给 UI 的一条知识三元组(主体-关系-客体),用于实时展示抽取过程与图谱。
type TripleView struct {
S string `json:"s"`
P string `json:"p"`
O string `json:"o"`
}
// IngestEvent 是入库流水线的实时进度事件(经 sundynix.streams.<job_id> 回流给 UI)。
type IngestEvent struct {
Stage string `json:"stage"` // 解析/切块/向量化/写Milvus/写Bleve/完成/失败
Msg string `json:"msg,omitempty"` // 文案
Done int `json:"done,omitempty"` // 进度(如已向量化块数)
Total int `json:"total,omitempty"` // 总数
Chunks []string `json:"chunks,omitempty"` // 切块预览(切块阶段发一次)
Error string `json:"error,omitempty"`
Stage string `json:"stage"` // 解析/解析完成/切块/向量化/写Milvus/写Bleve/抽实体/写Neo4j/完成/失败
Msg string `json:"msg,omitempty"` // 文案
Done int `json:"done,omitempty"` // 进度(如已向量化块数)
Total int `json:"total,omitempty"` // 总数
Chunks []string `json:"chunks,omitempty"` // 切块预览(切块阶段发一次)
Preview string `json:"preview,omitempty"` // 解析阶段:解析出的文本片段预览
Triples []TripleView `json:"triples,omitempty"` // 抽实体阶段:LLM 抽出的知识三元组(实时浮现 + 喂图谱)
Error string `json:"error,omitempty"`
}
// ModelConfig 是一个模型后端的连接配置(provider 抽象,chat 与 embedding 同形)。