feat: 实时入库监控 + 向量拆分可视化(异步入库 + 进度 SSE)

入库从同步改为异步流水线 + 进度回流(复用 token 流 NATS streaming)。
UI 实时看到 解析→切块→向量化(分批)→写入 各阶段 + 拆分块预览。

- shared: contract.IngestEvent(stage/done/total/chunks/error)
- mcp-go: rag.Ingest 加 onProgress + 分批向量化(10/批)逐批回报;kb_ingest 带 job_id
  把进度发到 sundynix.streams.<job_id> + CompleteStream
- gateway: 入库异步返回 job_id,后台 runIngest 发进度;GET /kb/ingest/:id/stream SSE
- frontend: streamIngest(EventSource);KbView 实时进度面板(阶段徽标+进度条+拆分列表)
- 验证: build✓+e2e PASS; 浏览器 12 行→6 阶段点亮+进度条 12/12+拆分 12 块逐条

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Blizzard
2026-06-11 10:33:36 +08:00
parent 3550a22557
commit 2d5fd2fca5
8 changed files with 358 additions and 63 deletions
+38 -8
View File
@@ -47,27 +47,57 @@ export function streamTokens(
return () => es.close();
}
// ingestKb: POST /api/v1/kb/ingest,把文本入库(→ mcp-go kb_ingest:切块/embedding/Milvus)。
// 入库进度事件(与后端 contract.IngestEvent 对应)。
export interface IngestEvent {
stage: string;
msg?: string;
done?: number;
total?: number;
chunks?: string[];
error?: string;
}
// ingestKb: POST /api/v1/kb/ingest —— 文本入库(异步,返回 job_id)。
export async function ingestKb(kb: string, text: string): Promise<string> {
const res = await fetch(`${GATEWAY}/api/v1/kb/ingest`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ kb, text }),
});
const data = (await res.json()) as { message?: string; error?: string };
if (!res.ok) throw new Error(data.error ?? `ingest failed: ${res.status}`);
return data.message ?? "ok";
const data = (await res.json()) as { job_id?: string; error?: string };
if (!res.ok || !data.job_id) throw new Error(data.error ?? `ingest failed: ${res.status}`);
return data.job_id;
}
// ingestFile: POST /api/v1/kb/ingest_filemultipart)—— 上传文件入库(docx/xlsx/pdf… → mcp-py 解析)。
// ingestFile: POST /api/v1/kb/ingest_filemultipart)—— 文件入库(异步,返回 job_id)。
export async function ingestFile(kb: string, file: File): Promise<string> {
const fd = new FormData();
fd.append("kb", kb);
fd.append("file", file);
const res = await fetch(`${GATEWAY}/api/v1/kb/ingest_file`, { method: "POST", body: fd });
const data = (await res.json()) as { message?: string; chars?: number; error?: string };
if (!res.ok) throw new Error(data.error ?? `ingest file failed: ${res.status}`);
return `${file.name}:解析 ${data.chars ?? 0} 字 → ${data.message ?? "ok"}`;
const data = (await res.json()) as { job_id?: string; error?: string };
if (!res.ok || !data.job_id) throw new Error(data.error ?? `ingest file failed: ${res.status}`);
return data.job_id;
}
// streamIngest: SSE 订阅入库进度(/kb/ingest/:id/stream)。返回关闭函数。
export function streamIngest(
jobId: string,
onEvent: (ev: IngestEvent) => void,
onDone: () => void,
onError?: () => void,
): () => void {
const es = new EventSource(`${GATEWAY}/api/v1/kb/ingest/${jobId}/stream`);
es.addEventListener("progress", (e) => onEvent(JSON.parse((e as MessageEvent).data) as IngestEvent));
es.addEventListener("done", () => {
es.close();
onDone();
});
es.onerror = () => {
es.close();
onError?.();
};
return () => es.close();
}
export interface KbHit {
+112 -21
View File
@@ -1,5 +1,5 @@
import { useRef, useState } from "react";
import { ingestKb, ingestFile, searchKb, type KbHit } from "../lib/api";
import { ingestKb, ingestFile, streamIngest, searchKb, type IngestEvent, type KbHit } from "../lib/api";
interface IngestLog {
t: string;
@@ -7,13 +7,21 @@ interface IngestLog {
ok: boolean;
}
// 知识库管理:入库监控(切块/embedding/Milvus+ 检索调试台(带分数与来源)。
interface Progress {
active: boolean;
stage: string;
done?: number;
total?: number;
chunks: string[];
error?: string;
}
// 知识库管理:实时入库监控(解析→切块→向量化→写入 + 拆分可视化)+ 检索调试台。
export function KbView() {
const [kb, setKb] = useState("docs");
const [text, setText] = useState("");
const [logs, setLogs] = useState<IngestLog[]>([]);
const [ingesting, setIngesting] = useState(false);
const [prog, setProg] = useState<Progress | null>(null);
const fileRef = useRef<HTMLInputElement>(null);
const [q, setQ] = useState("");
@@ -23,31 +31,54 @@ export function KbView() {
const [err, setErr] = useState("");
const stamp = () => new Date().toLocaleTimeString();
const ingesting = prog?.active ?? false;
// 订阅某入库 job 的进度流。
const follow = (job: string, label: string) => {
setProg({ active: true, stage: "提交", chunks: [] });
streamIngest(
job,
(ev: IngestEvent) =>
setProg((p) => ({
active: ev.stage !== "完成" && ev.stage !== "失败",
stage: ev.stage,
done: ev.done ?? p?.done,
total: ev.total ?? p?.total,
chunks: ev.chunks ?? p?.chunks ?? [],
error: ev.error,
})),
() =>
setProg((p) => {
const ok = p?.stage !== "失败";
setLogs((l) => [
{ t: stamp(), msg: ok ? `${label}${p?.total ?? 0} 块入库完成` : `${label}${p?.error ?? "失败"}`, ok },
...l,
]);
return p ? { ...p, active: false } : null;
}),
() => setProg((p) => (p ? { ...p, active: false, stage: "连接中断" } : null)),
);
};
const onIngest = async () => {
if (!text.trim()) return;
setIngesting(true);
try {
const msg = await ingestKb(kb, text);
setLogs((l) => [{ t: stamp(), msg, ok: true }, ...l]);
const job = await ingestKb(kb, text);
setText("");
follow(job, "文本");
} catch (e) {
setLogs((l) => [{ t: stamp(), msg: (e as Error).message, ok: false }, ...l]);
} finally {
setIngesting(false);
}
};
const onFile = async (file?: File) => {
if (!file) return;
setIngesting(true);
try {
const msg = await ingestFile(kb, file);
setLogs((l) => [{ t: stamp(), msg, ok: true }, ...l]);
const job = await ingestFile(kb, file);
follow(job, file.name);
} catch (e) {
setLogs((l) => [{ t: stamp(), msg: `${file.name}: ${(e as Error).message}`, ok: false }, ...l]);
} finally {
setIngesting(false);
if (fileRef.current) fileRef.current.value = "";
}
};
@@ -66,6 +97,8 @@ export function KbView() {
}
};
const pct = prog?.total ? Math.round(((prog.done ?? 0) / prog.total) * 100) : 0;
return (
<div className="flex h-full flex-col">
<div className="flex items-center gap-2 border-b bg-white px-4 py-2">
@@ -77,18 +110,18 @@ export function KbView() {
placeholder="知识库名"
title="知识库(Milvus kb 字段分区)"
/>
<span className="text-[11px] text-gray-400"> / embedding / Milvus </span>
<span className="text-[11px] text-gray-400"> / / / </span>
</div>
<div className="flex min-h-0 flex-1">
{/* 左:入库 + 监控日志 */}
{/* 左:入库 + 实时监控 */}
<section className="flex w-1/2 flex-col border-r p-4">
<h3 className="mb-2 text-xs font-semibold text-gray-600"></h3>
<h3 className="mb-2 text-xs font-semibold text-gray-600"></h3>
<textarea
className="h-40 w-full resize-none rounded border p-2 text-sm"
className="h-24 w-full resize-none rounded border p-2 text-sm"
value={text}
onChange={(e) => setText(e.target.value)}
placeholder={"每行一条知识,例如:\nsundynix 用 Milvus 做向量库\nsundynix 用 NATS 做消息总线"}
placeholder={"每行一条知识,或上传文件"}
/>
<div className="mt-2 flex items-center gap-2">
<button
@@ -109,7 +142,59 @@ export function KbView() {
/>
</div>
<span className="mt-1 text-[10px] text-gray-400"> txt/md/csv/docx/xlsx/pdfdocx/xlsx/pdf mcp-py </span>
<h3 className="mb-1 mt-4 text-xs font-semibold text-gray-600"></h3>
{/* 实时流水线进度 */}
{prog && (
<div className="mt-3 rounded border bg-gray-50 p-2">
<div className="flex items-center gap-2 text-xs">
{["解析", "切块", "向量化", "写Milvus", "写Bleve", "完成"].map((s) => {
const active = prog.stage.startsWith(s) || (s === "完成" && prog.stage === "完成");
const passed = stageOrder(prog.stage) > stageOrder(s);
return (
<span
key={s}
className={`rounded px-1.5 py-0.5 text-[10px] ${
prog.stage === "失败"
? "bg-gray-100 text-gray-400"
: active
? "bg-violet-600 text-white"
: passed
? "bg-emerald-100 text-emerald-700"
: "bg-gray-100 text-gray-400"
}`}
>
{s}
</span>
);
})}
</div>
{prog.error && <p className="mt-1 text-[11px] text-rose-600"> {prog.error}</p>}
{prog.total ? (
<div className="mt-2">
<div className="h-1.5 w-full overflow-hidden rounded bg-gray-200">
<div className="h-full bg-violet-500 transition-all" style={{ width: `${pct}%` }} />
</div>
<div className="mt-0.5 text-[10px] text-gray-500">
{prog.done ?? 0}/{prog.total} {pct}%
</div>
</div>
) : null}
{prog.chunks.length > 0 && (
<div className="mt-2">
<div className="text-[10px] font-medium text-gray-500"> {prog.chunks.length} </div>
<ul className="mt-1 max-h-24 space-y-0.5 overflow-auto">
{prog.chunks.map((c, i) => (
<li key={i} className="truncate rounded bg-white px-1.5 py-0.5 text-[10px] text-gray-600">
<span className="text-gray-400">#{i + 1}</span> {c}
</li>
))}
</ul>
</div>
)}
</div>
)}
<h3 className="mb-1 mt-4 text-xs font-semibold text-gray-600"></h3>
<ul className="flex-1 space-y-1 overflow-auto">
{logs.length === 0 && <li className="text-xs text-gray-400"></li>}
{logs.map((l, i) => (
@@ -122,7 +207,7 @@ export function KbView() {
{/* 右:检索调试台 */}
<section className="flex w-1/2 flex-col p-4">
<h3 className="mb-2 text-xs font-semibold text-gray-600"></h3>
<h3 className="mb-2 text-xs font-semibold text-gray-600"> + rerank</h3>
<div className="flex gap-2">
<input
className="flex-1 rounded border px-2 py-1 text-sm"
@@ -157,7 +242,7 @@ export function KbView() {
{hits?.map((h, i) => (
<li key={i} className="rounded border bg-gray-50 p-2">
<div className="mb-1 flex items-center gap-2 text-[10px]">
<span className="rounded bg-sky-100 px-1.5 py-0.5 text-sky-700">Milvus </span>
<span className="rounded bg-sky-100 px-1.5 py-0.5 text-sky-700"></span>
<span className="text-gray-400">#{i + 1}</span>
<span className="ml-auto font-mono text-violet-600">{h.score.toFixed(3)}</span>
</div>
@@ -170,3 +255,9 @@ export function KbView() {
</div>
);
}
const STAGES = ["解析", "解析完成", "切块", "向量化", "写Milvus", "写Bleve", "完成"];
function stageOrder(stage: string): number {
const i = STAGES.findIndex((s) => stage.startsWith(s));
return i < 0 ? -1 : i;
}