feat(kb): 大文档正文存 MinIO(PG 只留元数据+预览+对象键)
超过阈值(8000 字)的正文落对象存储,彻底解决十几万字文件塞 PG 的问题。 - internal/blob:minio-go 封装 Store(Open/Put/Get/Delete + Ready 降级);连不上则降级内联。 - docker-compose:milvus-minio 暴露 9000 端口供网关用作文档对象存储(bucket sundynix-docs)。 - main/router/handler:注入 blob.Store(env MINIO_*,默认 localhost:9000 minioadmin)。 - runIngest:size>8000 且 MinIO 可用 → 正文 Put 到 owner/kb/name,PG content 置空仅存 object_key+preview+size;否则内联。SaveDoc 改为按全文显式传 preview(offload 后内联为空也有预览)。 - KbDoc:object_key 非空时从 MinIO 取回全文。 验证:入 12182 字笔记 → PG content_len=0、object_key=wt/default/超大文件测试、preview 非空、 size=12182;/kb/doc 取回完整 12182 字(来自 MinIO);6321 字的仍内联(object_key 空)。 列表只读元数据+预览。gateway build 通过。 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,75 @@
|
||||
// Package blob 封装对象存储(MinIO):大文档正文落对象存储,PG 只留元数据 + 预览 + 对象键。
|
||||
package blob
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/minio/minio-go/v7"
|
||||
"github.com/minio/minio-go/v7/pkg/credentials"
|
||||
)
|
||||
|
||||
// Store 是对象存储句柄;cli 为 nil 表示降级(未连上 MinIO,大文档回退内联存 PG)。
|
||||
type Store struct {
|
||||
cli *minio.Client
|
||||
bucket string
|
||||
}
|
||||
|
||||
// Open 连接 MinIO 并确保 bucket 存在。连接失败返回降级实例(cli=nil),不阻断网关启动。
|
||||
func Open(endpoint, accessKey, secretKey, bucket string) *Store {
|
||||
cli, err := minio.New(endpoint, &minio.Options{
|
||||
Creds: credentials.NewStaticV4(accessKey, secretKey, ""),
|
||||
Secure: false,
|
||||
})
|
||||
if err != nil {
|
||||
log.Printf("[blob] MinIO 不可用,大文档回退内联: %v", err)
|
||||
return &Store{}
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
exists, err := cli.BucketExists(ctx, bucket)
|
||||
if err != nil {
|
||||
log.Printf("[blob] MinIO 连接失败,大文档回退内联: %v", err)
|
||||
return &Store{}
|
||||
}
|
||||
if !exists {
|
||||
if err := cli.MakeBucket(ctx, bucket, minio.MakeBucketOptions{}); err != nil {
|
||||
log.Printf("[blob] 建 bucket 失败,大文档回退内联: %v", err)
|
||||
return &Store{}
|
||||
}
|
||||
}
|
||||
log.Printf("[blob] MinIO connected %s bucket=%s", endpoint, bucket)
|
||||
return &Store{cli: cli, bucket: bucket}
|
||||
}
|
||||
|
||||
// Ready 报告对象存储是否可用。
|
||||
func (s *Store) Ready() bool { return s != nil && s.cli != nil }
|
||||
|
||||
// Put 写入一段文本到对象键 key。
|
||||
func (s *Store) Put(ctx context.Context, key, content string) error {
|
||||
r := bytes.NewReader([]byte(content))
|
||||
_, err := s.cli.PutObject(ctx, s.bucket, key, r, int64(len(content)), minio.PutObjectOptions{ContentType: "text/plain; charset=utf-8"})
|
||||
return err
|
||||
}
|
||||
|
||||
// Get 读回对象键 key 的全部文本。
|
||||
func (s *Store) Get(ctx context.Context, key string) (string, error) {
|
||||
obj, err := s.cli.GetObject(ctx, s.bucket, key, minio.GetObjectOptions{})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer obj.Close()
|
||||
b, err := io.ReadAll(obj)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(b), nil
|
||||
}
|
||||
|
||||
// Delete 删除对象(best-effort)。
|
||||
func (s *Store) Delete(ctx context.Context, key string) {
|
||||
_ = s.cli.RemoveObject(ctx, s.bucket, key, minio.RemoveObjectOptions{})
|
||||
}
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
@@ -20,6 +21,9 @@ import (
|
||||
"github.com/sundynix/sundynix-shared/contract"
|
||||
)
|
||||
|
||||
// docInlineMax 是内联存 PG 的正文字数上限;超过则正文落 MinIO,PG 只留元数据+预览+对象键。
|
||||
const docInlineMax = 8000
|
||||
|
||||
// rawKB 规整知识库名(去空白,空则 default)—— 注册表里的展示名。
|
||||
func rawKB(kb string) string {
|
||||
kb = strings.TrimSpace(kb)
|
||||
@@ -139,7 +143,13 @@ func (h *Handler) KbDoc(c *gin.Context) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "文档不存在"})
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"name": d.Name, "content": d.Content, "size": d.Size})
|
||||
content := d.Content
|
||||
if d.ObjectKey != "" && h.blob.Ready() { // 大文档:从 MinIO 取回正文
|
||||
if obj, oerr := h.blob.Get(c.Request.Context(), d.ObjectKey); oerr == nil {
|
||||
content = obj
|
||||
}
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"name": d.Name, "content": content, "size": d.Size})
|
||||
}
|
||||
|
||||
// KbLinks: GET /api/v1/kb/links?kb= —— 某库全部 [[双链]](from→to),供反链/笔记关系图。
|
||||
@@ -232,7 +242,18 @@ func (h *Handler) runIngest(job, owner, kbName, scoped, forceDoc, filename strin
|
||||
docName = noteName(text)
|
||||
}
|
||||
if text != "" {
|
||||
_ = h.db.SaveDoc(ctx, owner, kbName, docName, text, "", len([]rune(text)))
|
||||
size := len([]rune(text))
|
||||
inline, objectKey := text, ""
|
||||
// 大文档正文落对象存储,PG 只留元数据+预览+对象键(避免把十几万字塞进 PG)。
|
||||
if size > docInlineMax && h.blob.Ready() {
|
||||
key := owner + "/" + kbName + "/" + docName
|
||||
if err := h.blob.Put(ctx, key, text); err == nil {
|
||||
inline, objectKey = "", key
|
||||
} else {
|
||||
log.Printf("[gateway] 大文档转 MinIO 失败,回退内联: %v", err)
|
||||
}
|
||||
}
|
||||
_ = h.db.SaveDoc(ctx, owner, kbName, docName, inline, objectKey, size, head(text, 500))
|
||||
_ = h.db.ReplaceDocLinks(ctx, owner, kbName, docName, wikiLinks(text)) // 维护 [[双链]] 索引
|
||||
}
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/sundynix/sundynix-gateway/internal/blob"
|
||||
"github.com/sundynix/sundynix-gateway/internal/dsl"
|
||||
"github.com/sundynix/sundynix-gateway/internal/nats"
|
||||
"github.com/sundynix/sundynix-gateway/internal/store"
|
||||
@@ -21,10 +22,11 @@ type Handler struct {
|
||||
db *store.Postgres
|
||||
cache *store.Redis
|
||||
bus *nats.Bus
|
||||
blob *blob.Store
|
||||
}
|
||||
|
||||
func New(db *store.Postgres, cache *store.Redis, bus *nats.Bus) *Handler {
|
||||
return &Handler{db: db, cache: cache, bus: bus}
|
||||
func New(db *store.Postgres, cache *store.Redis, bus *nats.Bus, blob *blob.Store) *Handler {
|
||||
return &Handler{db: db, cache: cache, bus: bus, blob: blob}
|
||||
}
|
||||
|
||||
// SubmitTask: 解析客户端导出的 JSON DSL,组装为 Task,Publish 到 sundynix.tasks.*。
|
||||
|
||||
@@ -4,6 +4,7 @@ package router
|
||||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/sundynix/sundynix-gateway/internal/blob"
|
||||
"github.com/sundynix/sundynix-gateway/internal/handler"
|
||||
"github.com/sundynix/sundynix-gateway/internal/middleware"
|
||||
"github.com/sundynix/sundynix-gateway/internal/nats"
|
||||
@@ -11,13 +12,13 @@ import (
|
||||
)
|
||||
|
||||
// New 构建带有 Guardrail / 限流中间件的 Gin 引擎。
|
||||
func New(db *store.Postgres, cache *store.Redis, bus *nats.Bus) *gin.Engine {
|
||||
func New(db *store.Postgres, cache *store.Redis, bus *nats.Bus, blobStore *blob.Store) *gin.Engine {
|
||||
r := gin.Default()
|
||||
r.Use(cors()) // 桌面端/浏览器跨源访问(开发期放开)
|
||||
r.Use(middleware.RateLimit(cache))
|
||||
r.Use(middleware.Guardrail()) // Harness: Input/Output Guardrail
|
||||
|
||||
h := handler.New(db, cache, bus)
|
||||
h := handler.New(db, cache, bus, blobStore)
|
||||
api := r.Group("/api/v1")
|
||||
{
|
||||
api.POST("/tasks", h.SubmitTask) // 1. 解析 DSL 并 Publish 到 NATS
|
||||
|
||||
@@ -121,13 +121,11 @@ func docHead(s string, n int) string {
|
||||
}
|
||||
|
||||
// SaveDoc 写入/更新一份文档(owner+kb+name 唯一,重名覆盖)。
|
||||
// 同时维护 size 与 preview(列表只读它们,不拉全文)。content 入参为内联正文;
|
||||
// objectKey 非空表示正文已转 MinIO(此时 content 传空)。
|
||||
func (p *Postgres) SaveDoc(ctx context.Context, owner, kb, name, content, objectKey string, size int) error {
|
||||
// content 为内联正文(大文档转 MinIO 时传空 + objectKey);preview/size 由调用方按全文给出。
|
||||
func (p *Postgres) SaveDoc(ctx context.Context, owner, kb, name, content, objectKey string, size int, preview string) error {
|
||||
if p.db == nil {
|
||||
return nil
|
||||
}
|
||||
preview := docHead(content, 500)
|
||||
return p.db.WithContext(ctx).Clauses(clause.OnConflict{
|
||||
Columns: []clause.Column{{Name: "owner"}, {Name: "kb"}, {Name: "name"}},
|
||||
DoUpdates: clause.Assignments(map[string]any{"content": content, "object_key": objectKey, "size": size, "preview": preview, "updated_at": time.Now()}),
|
||||
|
||||
Reference in New Issue
Block a user