2d5fd2fca5
入库从同步改为异步流水线 + 进度回流(复用 token 流 NATS streaming)。 UI 实时看到 解析→切块→向量化(分批)→写入 各阶段 + 拆分块预览。 - shared: contract.IngestEvent(stage/done/total/chunks/error) - mcp-go: rag.Ingest 加 onProgress + 分批向量化(10/批)逐批回报;kb_ingest 带 job_id 把进度发到 sundynix.streams.<job_id> + CompleteStream - gateway: 入库异步返回 job_id,后台 runIngest 发进度;GET /kb/ingest/:id/stream SSE - frontend: streamIngest(EventSource);KbView 实时进度面板(阶段徽标+进度条+拆分列表) - 验证: build✓+e2e PASS; 浏览器 12 行→6 阶段点亮+进度条 12/12+拆分 12 块逐条 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
211 lines
5.9 KiB
Go
211 lines
5.9 KiB
Go
package handler
|
||
|
||
import (
|
||
"context"
|
||
"crypto/rand"
|
||
"encoding/base64"
|
||
"encoding/hex"
|
||
"encoding/json"
|
||
"errors"
|
||
"io"
|
||
"net/http"
|
||
"path/filepath"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/gin-gonic/gin"
|
||
|
||
"github.com/sundynix/sundynix-shared/contract"
|
||
)
|
||
|
||
// KbIngest: POST /api/v1/kb/ingest —— 文本入库(异步,返回 job_id;进度经 SSE 看)。
|
||
func (h *Handler) KbIngest(c *gin.Context) {
|
||
var body struct {
|
||
KB string `json:"kb"`
|
||
Text string `json:"text"`
|
||
}
|
||
if err := c.ShouldBindJSON(&body); err != nil || body.Text == "" {
|
||
c.JSON(http.StatusBadRequest, gin.H{"error": "text required"})
|
||
return
|
||
}
|
||
job := newJobID()
|
||
go h.runIngest(job, body.KB, "", nil, body.Text)
|
||
c.JSON(http.StatusAccepted, gin.H{"job_id": job})
|
||
}
|
||
|
||
// KbIngestFile: POST /api/v1/kb/ingest_file(multipart)—— 文件入库(异步,返回 job_id)。
|
||
// 流水线(解析→切块→向量化→写入)的进度经 sundynix.streams.<job_id> 回流,UI 用 SSE 看。
|
||
func (h *Handler) KbIngestFile(c *gin.Context) {
|
||
kb := c.PostForm("kb")
|
||
fh, err := c.FormFile("file")
|
||
if err != nil {
|
||
c.JSON(http.StatusBadRequest, gin.H{"error": "file required"})
|
||
return
|
||
}
|
||
f, err := fh.Open()
|
||
if err != nil {
|
||
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
|
||
return
|
||
}
|
||
defer f.Close()
|
||
data, err := io.ReadAll(f)
|
||
if err != nil {
|
||
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
|
||
return
|
||
}
|
||
job := newJobID()
|
||
go h.runIngest(job, kb, fh.Filename, data, "")
|
||
c.JSON(http.StatusAccepted, gin.H{"job_id": job, "file": fh.Filename})
|
||
}
|
||
|
||
// runIngest 后台跑入库流水线,逐阶段把进度发到 sundynix.streams.<job>。
|
||
// filename 非空表示文件入库(先经 mcp-py 解析);否则用 rawText。
|
||
func (h *Handler) runIngest(job, kb, filename string, data []byte, rawText string) {
|
||
ctx := context.Background()
|
||
emit := func(ev contract.IngestEvent) { _ = h.bus.PublishIngest(job, &ev) }
|
||
time.Sleep(400 * time.Millisecond) // 给 SSE 客户端订阅时间(core NATS 无缓冲)
|
||
|
||
text := rawText
|
||
if filename != "" {
|
||
emit(contract.IngestEvent{Stage: "解析", Msg: filename})
|
||
parsed, err := h.parseFile(ctx, filename, data)
|
||
if err != nil {
|
||
emit(contract.IngestEvent{Stage: "失败", Error: "解析失败: " + err.Error()})
|
||
_ = h.bus.CompleteStream(job)
|
||
return
|
||
}
|
||
emit(contract.IngestEvent{Stage: "解析完成", Msg: "解析出 " + itoa(len([]rune(parsed))) + " 字"})
|
||
text = parsed
|
||
}
|
||
|
||
// 调 mcp-go kb_ingest(带 job_id):它会发 切块/向量化/写入/完成 事件 + CompleteStream。
|
||
res, err := h.bus.CallTool(ctx, contract.ToolSubjectGo("kb_ingest"),
|
||
&contract.ToolCall{Tool: "kb_ingest", Args: map[string]any{"kb": kb, "text": text, "job_id": job}})
|
||
if err != nil || res == nil || !res.OK {
|
||
msg := "kb_ingest 失败"
|
||
if err != nil {
|
||
msg = err.Error()
|
||
} else if res != nil {
|
||
msg = res.Error
|
||
}
|
||
emit(contract.IngestEvent{Stage: "失败", Error: msg})
|
||
_ = h.bus.CompleteStream(job)
|
||
}
|
||
}
|
||
|
||
// KbIngestStream: GET /api/v1/kb/ingest/:id/stream —— SSE 实时推送入库进度事件。
|
||
func (h *Handler) KbIngestStream(c *gin.Context) {
|
||
job := c.Param("id")
|
||
c.Writer.Header().Set("Content-Type", "text/event-stream")
|
||
c.Writer.Header().Set("Cache-Control", "no-cache")
|
||
c.Writer.Header().Set("Connection", "keep-alive")
|
||
|
||
events := make(chan []byte, 64)
|
||
done := make(chan struct{})
|
||
unsub, err := h.bus.SubscribeTokens(job,
|
||
func(ev []byte) {
|
||
select {
|
||
case events <- ev:
|
||
default:
|
||
}
|
||
},
|
||
func() { close(done) },
|
||
)
|
||
if err != nil {
|
||
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
|
||
return
|
||
}
|
||
defer func() { _ = unsub() }()
|
||
|
||
c.Stream(func(w io.Writer) bool {
|
||
select {
|
||
case ev := <-events:
|
||
c.SSEvent("progress", string(ev))
|
||
return true
|
||
case <-done:
|
||
c.SSEvent("done", job)
|
||
return false
|
||
case <-c.Request.Context().Done():
|
||
return false
|
||
}
|
||
})
|
||
}
|
||
|
||
func newJobID() string {
|
||
var b [8]byte
|
||
_, _ = rand.Read(b[:])
|
||
return "ingest_" + hex.EncodeToString(b[:])
|
||
}
|
||
|
||
// itoa 简易整数转字符串(避免引入 strconv)。
|
||
func itoa(n int) string {
|
||
if n == 0 {
|
||
return "0"
|
||
}
|
||
neg := n < 0
|
||
if neg {
|
||
n = -n
|
||
}
|
||
var b []byte
|
||
for n > 0 {
|
||
b = append([]byte{byte('0' + n%10)}, b...)
|
||
n /= 10
|
||
}
|
||
if neg {
|
||
b = append([]byte{'-'}, b...)
|
||
}
|
||
return string(b)
|
||
}
|
||
|
||
// parseFile 把文件字节转为纯文本:文本类直读,其余经 mcp-py parse_document(算法层)。
|
||
func (h *Handler) parseFile(ctx context.Context, filename string, data []byte) (string, error) {
|
||
switch strings.ToLower(filepath.Ext(filename)) {
|
||
case ".txt", ".md", ".markdown", ".text":
|
||
return string(data), nil
|
||
}
|
||
res, err := h.bus.CallTool(ctx, contract.ToolSubjectPy("parse_document"),
|
||
&contract.ToolCall{Tool: "parse_document", Args: map[string]any{
|
||
"filename": filename, "content_b64": base64.StdEncoding.EncodeToString(data),
|
||
}})
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
if res == nil || !res.OK {
|
||
if res != nil && res.Error != "" {
|
||
return "", errors.New(res.Error)
|
||
}
|
||
return "", errors.New("parse_document 无响应(mcp-py 未运行?)")
|
||
}
|
||
return res.Content, nil
|
||
}
|
||
|
||
// KbSearch: POST /api/v1/kb/search —— 检索台:查某知识库,返回带分数的命中(→ mcp-go kb_search)。
|
||
func (h *Handler) KbSearch(c *gin.Context) {
|
||
var body struct {
|
||
KB string `json:"kb"`
|
||
Q string `json:"q"`
|
||
TopK int `json:"topK"`
|
||
}
|
||
if err := c.ShouldBindJSON(&body); err != nil || body.Q == "" {
|
||
c.JSON(http.StatusBadRequest, gin.H{"error": "q required"})
|
||
return
|
||
}
|
||
args := map[string]any{"kb": body.KB, "q": body.Q}
|
||
if body.TopK > 0 {
|
||
args["topK"] = body.TopK
|
||
}
|
||
res, err := h.bus.CallTool(c.Request.Context(), contract.ToolSubjectGo("kb_search"),
|
||
&contract.ToolCall{Tool: "kb_search", Args: args})
|
||
if err != nil {
|
||
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
|
||
return
|
||
}
|
||
if !res.OK {
|
||
c.JSON(http.StatusUnprocessableEntity, gin.H{"error": res.Error})
|
||
return
|
||
}
|
||
var hits []map[string]any
|
||
_ = json.Unmarshal([]byte(res.Content), &hits)
|
||
c.JSON(http.StatusOK, gin.H{"hits": hits})
|
||
}
|