feat: 实时入库监控 + 向量拆分可视化(异步入库 + 进度 SSE)
入库从同步改为异步流水线 + 进度回流(复用 token 流 NATS streaming)。 UI 实时看到 解析→切块→向量化(分批)→写入 各阶段 + 拆分块预览。 - shared: contract.IngestEvent(stage/done/total/chunks/error) - mcp-go: rag.Ingest 加 onProgress + 分批向量化(10/批)逐批回报;kb_ingest 带 job_id 把进度发到 sundynix.streams.<job_id> + CompleteStream - gateway: 入库异步返回 job_id,后台 runIngest 发进度;GET /kb/ingest/:id/stream SSE - frontend: streamIngest(EventSource);KbView 实时进度面板(阶段徽标+进度条+拆分列表) - 验证: build✓+e2e PASS; 浏览器 12 行→6 阶段点亮+进度条 12/12+拆分 12 块逐条 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -2,21 +2,23 @@ package handler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/base64"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/sundynix/sundynix-shared/contract"
|
||||
)
|
||||
|
||||
// KbIngest: POST /api/v1/kb/ingest —— 把文本入库到知识库(→ mcp-go kb_ingest → 切块/embedding/Milvus)。
|
||||
// 供知识库管理页/脚本调用。
|
||||
// KbIngest: POST /api/v1/kb/ingest —— 文本入库(异步,返回 job_id;进度经 SSE 看)。
|
||||
func (h *Handler) KbIngest(c *gin.Context) {
|
||||
var body struct {
|
||||
KB string `json:"kb"`
|
||||
@@ -26,21 +28,13 @@ func (h *Handler) KbIngest(c *gin.Context) {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "text required"})
|
||||
return
|
||||
}
|
||||
res, err := h.bus.CallTool(c.Request.Context(), contract.ToolSubjectGo("kb_ingest"),
|
||||
&contract.ToolCall{Tool: "kb_ingest", Args: map[string]any{"kb": body.KB, "text": body.Text}})
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
if !res.OK {
|
||||
c.JSON(http.StatusUnprocessableEntity, gin.H{"error": res.Error})
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"status": "ok", "message": res.Content})
|
||||
job := newJobID()
|
||||
go h.runIngest(job, body.KB, "", nil, body.Text)
|
||||
c.JSON(http.StatusAccepted, gin.H{"job_id": job})
|
||||
}
|
||||
|
||||
// KbIngestFile: POST /api/v1/kb/ingest_file(multipart)—— 上传文件入库。
|
||||
// 按类型路由:文本直读;docx/xlsx/pdf/csv → mcp-py parse_document 解析为文本 → kb_ingest。
|
||||
// KbIngestFile: POST /api/v1/kb/ingest_file(multipart)—— 文件入库(异步,返回 job_id)。
|
||||
// 流水线(解析→切块→向量化→写入)的进度经 sundynix.streams.<job_id> 回流,UI 用 SSE 看。
|
||||
func (h *Handler) KbIngestFile(c *gin.Context) {
|
||||
kb := c.PostForm("kb")
|
||||
fh, err := c.FormFile("file")
|
||||
@@ -59,22 +53,108 @@ func (h *Handler) KbIngestFile(c *gin.Context) {
|
||||
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
text, err := h.parseFile(c.Request.Context(), fh.Filename, data)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusUnprocessableEntity, gin.H{"error": "解析失败: " + err.Error()})
|
||||
return
|
||||
job := newJobID()
|
||||
go h.runIngest(job, kb, fh.Filename, data, "")
|
||||
c.JSON(http.StatusAccepted, gin.H{"job_id": job, "file": fh.Filename})
|
||||
}
|
||||
|
||||
// runIngest 后台跑入库流水线,逐阶段把进度发到 sundynix.streams.<job>。
|
||||
// filename 非空表示文件入库(先经 mcp-py 解析);否则用 rawText。
|
||||
func (h *Handler) runIngest(job, kb, filename string, data []byte, rawText string) {
|
||||
ctx := context.Background()
|
||||
emit := func(ev contract.IngestEvent) { _ = h.bus.PublishIngest(job, &ev) }
|
||||
time.Sleep(400 * time.Millisecond) // 给 SSE 客户端订阅时间(core NATS 无缓冲)
|
||||
|
||||
text := rawText
|
||||
if filename != "" {
|
||||
emit(contract.IngestEvent{Stage: "解析", Msg: filename})
|
||||
parsed, err := h.parseFile(ctx, filename, data)
|
||||
if err != nil {
|
||||
emit(contract.IngestEvent{Stage: "失败", Error: "解析失败: " + err.Error()})
|
||||
_ = h.bus.CompleteStream(job)
|
||||
return
|
||||
}
|
||||
emit(contract.IngestEvent{Stage: "解析完成", Msg: "解析出 " + itoa(len([]rune(parsed))) + " 字"})
|
||||
text = parsed
|
||||
}
|
||||
res, err := h.bus.CallTool(c.Request.Context(), contract.ToolSubjectGo("kb_ingest"),
|
||||
&contract.ToolCall{Tool: "kb_ingest", Args: map[string]any{"kb": kb, "text": text}})
|
||||
|
||||
// 调 mcp-go kb_ingest(带 job_id):它会发 切块/向量化/写入/完成 事件 + CompleteStream。
|
||||
res, err := h.bus.CallTool(ctx, contract.ToolSubjectGo("kb_ingest"),
|
||||
&contract.ToolCall{Tool: "kb_ingest", Args: map[string]any{"kb": kb, "text": text, "job_id": job}})
|
||||
if err != nil || res == nil || !res.OK {
|
||||
msg := "kb_ingest 失败"
|
||||
if err != nil {
|
||||
msg = err.Error()
|
||||
} else if res != nil {
|
||||
msg = res.Error
|
||||
}
|
||||
emit(contract.IngestEvent{Stage: "失败", Error: msg})
|
||||
_ = h.bus.CompleteStream(job)
|
||||
}
|
||||
}
|
||||
|
||||
// KbIngestStream: GET /api/v1/kb/ingest/:id/stream —— SSE 实时推送入库进度事件。
|
||||
func (h *Handler) KbIngestStream(c *gin.Context) {
|
||||
job := c.Param("id")
|
||||
c.Writer.Header().Set("Content-Type", "text/event-stream")
|
||||
c.Writer.Header().Set("Cache-Control", "no-cache")
|
||||
c.Writer.Header().Set("Connection", "keep-alive")
|
||||
|
||||
events := make(chan []byte, 64)
|
||||
done := make(chan struct{})
|
||||
unsub, err := h.bus.SubscribeTokens(job,
|
||||
func(ev []byte) {
|
||||
select {
|
||||
case events <- ev:
|
||||
default:
|
||||
}
|
||||
},
|
||||
func() { close(done) },
|
||||
)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
if !res.OK {
|
||||
c.JSON(http.StatusUnprocessableEntity, gin.H{"error": res.Error})
|
||||
return
|
||||
defer func() { _ = unsub() }()
|
||||
|
||||
c.Stream(func(w io.Writer) bool {
|
||||
select {
|
||||
case ev := <-events:
|
||||
c.SSEvent("progress", string(ev))
|
||||
return true
|
||||
case <-done:
|
||||
c.SSEvent("done", job)
|
||||
return false
|
||||
case <-c.Request.Context().Done():
|
||||
return false
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func newJobID() string {
|
||||
var b [8]byte
|
||||
_, _ = rand.Read(b[:])
|
||||
return "ingest_" + hex.EncodeToString(b[:])
|
||||
}
|
||||
|
||||
// itoa 简易整数转字符串(避免引入 strconv)。
|
||||
func itoa(n int) string {
|
||||
if n == 0 {
|
||||
return "0"
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"status": "ok", "file": fh.Filename, "chars": len([]rune(text)), "message": res.Content})
|
||||
neg := n < 0
|
||||
if neg {
|
||||
n = -n
|
||||
}
|
||||
var b []byte
|
||||
for n > 0 {
|
||||
b = append([]byte{byte('0' + n%10)}, b...)
|
||||
n /= 10
|
||||
}
|
||||
if neg {
|
||||
b = append([]byte{'-'}, b...)
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
// parseFile 把文件字节转为纯文本:文本类直读,其余经 mcp-py parse_document(算法层)。
|
||||
|
||||
@@ -3,6 +3,7 @@ package nats
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log"
|
||||
|
||||
sharedbus "github.com/sundynix/sundynix-shared/bus"
|
||||
@@ -58,4 +59,16 @@ func (b *Bus) PublishConfigUpdated(kind string, cfg *contract.ModelConfig) error
|
||||
return b.inner.PublishConfigUpdated(kind, cfg)
|
||||
}
|
||||
|
||||
// PublishIngest 把一条入库进度事件发到 sundynix.streams.<jobID>。
|
||||
func (b *Bus) PublishIngest(jobID string, ev *contract.IngestEvent) error {
|
||||
data, err := json.Marshal(ev)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return b.inner.PublishToken(jobID, data)
|
||||
}
|
||||
|
||||
// CompleteStream 发送入库流结束信号。
|
||||
func (b *Bus) CompleteStream(jobID string) error { return b.inner.CompleteStream(jobID) }
|
||||
|
||||
func (b *Bus) Close() { b.inner.Close() }
|
||||
|
||||
@@ -24,8 +24,9 @@ func New(db *store.Postgres, cache *store.Redis, bus *nats.Bus) *gin.Engine {
|
||||
api.GET("/tasks/:id/stream", h.StreamTask) // 4. SSE/WS 回流 Token Stream
|
||||
api.PUT("/memory", h.SetMemory) // 偏好记忆登记(→ mcp-go memory_upsert)
|
||||
api.POST("/kb/ingest", h.KbIngest) // 知识库入库(文本,→ mcp-go kb_ingest)
|
||||
api.POST("/kb/ingest_file", h.KbIngestFile) // 文件入库(docx/xlsx/pdf… → mcp-py 解析 → 入库)
|
||||
api.POST("/kb/search", h.KbSearch) // 知识库检索台(→ mcp-go kb_search)
|
||||
api.POST("/kb/ingest_file", h.KbIngestFile) // 文件入库(docx/xlsx/pdf… 异步)
|
||||
api.GET("/kb/ingest/:id/stream", h.KbIngestStream) // 入库进度 SSE(实时监控)
|
||||
api.POST("/kb/search", h.KbSearch) // 知识库检索台(→ mcp-go kb_search)
|
||||
api.GET("/billing", h.Billing)
|
||||
|
||||
// 运维控制面:LLM 模型配置(独立运维控制台调用)。
|
||||
|
||||
Reference in New Issue
Block a user