3550a22557
入库从纯文本升级为多文件类型:解析(mcp-py 算法层)与切块/embedding 解耦。 上传文件 → Gateway 按类型路由 → mcp-py parse_document 解析为文本 → kb_ingest。 - mcp-py: parsers.py(docx=python-docx / xlsx=openpyxl / pdf=pypdf / csv / txt→文本); parse_document 工具做真(base64 文件→文本,线程池跑 CPU 密集解析);pyproject 加依赖 - gateway: POST /api/v1/kb/ingest_file(multipart);parseFile 文本类直读、office/pdf→mcp-py - nats-server.conf: max_payload 8MB(容纳 base64 文件经工具调用;大文件应走对象存储) - frontend: KbView 加文件上传(accept docx/xlsx/pdf/csv...);api.ingestFile - 验证: 全模块 build✓ + e2e PASS; live——4 类文件上传→mcp-py 解析→入库→检索命中: docx(营收报告)/xlsx(销量表行)/pdf(Q2计划)/csv(城市人口) 全部正确 - 边界: 扫描件/版面 OCR(MinerU/PaddleOCR)推迟;大文件 base64 走 NATS 受 max_payload 限,生产应走对象存储(MinIO) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
109 lines
3.9 KiB
TypeScript
109 lines
3.9 KiB
TypeScript
// Gateway HTTP/SSE 客户端:提交 DSL 任务、订阅 Token 流、登记偏好记忆。
|
||
import type { TaskDsl } from "./dsl";
|
||
|
||
// 开发期直连 Gateway;Wails 打包后可改为本地后端地址或经 Go 绑定。
|
||
export const GATEWAY: string =
|
||
(import.meta.env.VITE_GATEWAY as string | undefined) ?? "http://localhost:8080";
|
||
|
||
export interface Identity {
|
||
userId: string;
|
||
sessionId: string;
|
||
}
|
||
|
||
// submitTask: POST /api/v1/tasks,返回 task_id。
|
||
export async function submitTask(dsl: TaskDsl, id: Identity): Promise<string> {
|
||
const res = await fetch(`${GATEWAY}/api/v1/tasks`, {
|
||
method: "POST",
|
||
headers: {
|
||
"Content-Type": "application/json",
|
||
"X-User-ID": id.userId,
|
||
"X-Session-ID": id.sessionId,
|
||
},
|
||
body: JSON.stringify(dsl),
|
||
});
|
||
if (!res.ok) throw new Error(`submit failed: ${res.status} ${await res.text()}`);
|
||
const data = (await res.json()) as { task_id: string };
|
||
return data.task_id;
|
||
}
|
||
|
||
// streamTokens: 订阅 SSE /api/v1/tasks/:id/stream,逐 token 回调,done 收尾。
|
||
// 返回关闭函数。注意 EventSource 无法带请求头,但流按 task_id 寻址,无需身份头。
|
||
export function streamTokens(
|
||
taskId: string,
|
||
onToken: (t: string) => void,
|
||
onDone: () => void,
|
||
onError?: (e: unknown) => void,
|
||
): () => void {
|
||
const es = new EventSource(`${GATEWAY}/api/v1/tasks/${taskId}/stream`);
|
||
es.addEventListener("token", (e) => onToken((e as MessageEvent).data));
|
||
es.addEventListener("done", () => {
|
||
es.close();
|
||
onDone();
|
||
});
|
||
es.onerror = (e) => {
|
||
es.close();
|
||
onError?.(e);
|
||
};
|
||
return () => es.close();
|
||
}
|
||
|
||
// ingestKb: POST /api/v1/kb/ingest,把文本入库(→ mcp-go kb_ingest:切块/embedding/Milvus)。
|
||
export async function ingestKb(kb: string, text: string): Promise<string> {
|
||
const res = await fetch(`${GATEWAY}/api/v1/kb/ingest`, {
|
||
method: "POST",
|
||
headers: { "Content-Type": "application/json" },
|
||
body: JSON.stringify({ kb, text }),
|
||
});
|
||
const data = (await res.json()) as { message?: string; error?: string };
|
||
if (!res.ok) throw new Error(data.error ?? `ingest failed: ${res.status}`);
|
||
return data.message ?? "ok";
|
||
}
|
||
|
||
// ingestFile: POST /api/v1/kb/ingest_file(multipart)—— 上传文件入库(docx/xlsx/pdf… → mcp-py 解析)。
|
||
export async function ingestFile(kb: string, file: File): Promise<string> {
|
||
const fd = new FormData();
|
||
fd.append("kb", kb);
|
||
fd.append("file", file);
|
||
const res = await fetch(`${GATEWAY}/api/v1/kb/ingest_file`, { method: "POST", body: fd });
|
||
const data = (await res.json()) as { message?: string; chars?: number; error?: string };
|
||
if (!res.ok) throw new Error(data.error ?? `ingest file failed: ${res.status}`);
|
||
return `${file.name}:解析 ${data.chars ?? 0} 字 → ${data.message ?? "ok"}`;
|
||
}
|
||
|
||
export interface KbHit {
|
||
text: string;
|
||
score: number;
|
||
}
|
||
|
||
// searchKb: POST /api/v1/kb/search,检索台查询(→ mcp-go kb_search,带分数)。
|
||
export async function searchKb(kb: string, q: string, topK = 5): Promise<KbHit[]> {
|
||
const res = await fetch(`${GATEWAY}/api/v1/kb/search`, {
|
||
method: "POST",
|
||
headers: { "Content-Type": "application/json" },
|
||
body: JSON.stringify({ kb, q, topK }),
|
||
});
|
||
const data = (await res.json()) as { hits?: KbHit[]; error?: string };
|
||
if (!res.ok) throw new Error(data.error ?? `search failed: ${res.status}`);
|
||
return data.hits ?? [];
|
||
}
|
||
|
||
// setMemory: PUT /api/v1/memory,登记一条用户偏好(→ mcp-go memory_upsert)。
|
||
export async function setMemory(
|
||
id: Identity,
|
||
key: string,
|
||
value: string,
|
||
): Promise<string> {
|
||
const res = await fetch(`${GATEWAY}/api/v1/memory`, {
|
||
method: "PUT",
|
||
headers: {
|
||
"Content-Type": "application/json",
|
||
"X-User-ID": id.userId,
|
||
"X-Session-ID": id.sessionId,
|
||
},
|
||
body: JSON.stringify({ key, value }),
|
||
});
|
||
const data = (await res.json()) as { message?: string; error?: string };
|
||
if (!res.ok) throw new Error(data.error ?? `memory failed: ${res.status}`);
|
||
return data.message ?? "ok";
|
||
}
|