From e5bbe7318c69c3adcd4bd1c850e1a39ce1d70710 Mon Sep 17 00:00:00 2001 From: Blizzard Date: Wed, 10 Jun 2026 17:12:24 +0800 Subject: [PATCH] =?UTF-8?q?feat(rag):=20Milvus=20=E9=9B=86=E5=90=88?= =?UTF-8?q?=E7=BB=B4=E5=BA=A6=E8=87=AA=E9=80=82=E5=BA=94=20=E2=80=94=20?= =?UTF-8?q?=E5=88=87=E6=8D=A2=20embedding=20=E6=A8=A1=E5=9E=8B=E8=87=AA?= =?UTF-8?q?=E5=8A=A8=E9=87=8D=E5=BB=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ensure() 检测已存集合的向量维度,与当前 embedding 维度不一致则 Drop+重建 (DescribeCollection 读 TypeParams[dim])。支持从 mock(256)切到真实 embedding(如 百炼 text-embedding-v3=1024 维)无需手动清库。 验证: 接阿里云百炼 text-embedding-v3(OpenAI 兼容)真实 embedding——集合自动 256→1024 重建,入库5条;语义检索(查询措辞与文档不同):'存储搜索向量的组件' →Milvus、'传消息的中间件'→NATS、'知识图谱存哪'→Neo4j,全部语义命中正确。 Co-Authored-By: Claude Opus 4.8 --- sundynix-mcp-go/internal/rag/milvus.go | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/sundynix-mcp-go/internal/rag/milvus.go b/sundynix-mcp-go/internal/rag/milvus.go index 3ca1a1f..58d37b9 100644 --- a/sundynix-mcp-go/internal/rag/milvus.go +++ b/sundynix-mcp-go/internal/rag/milvus.go @@ -3,6 +3,8 @@ package rag import ( "context" "fmt" + "log" + "strconv" "sync" "github.com/milvus-io/milvus-sdk-go/v2/client" @@ -44,6 +46,18 @@ func (m *milvusStore) ensure(ctx context.Context, dim int) error { if err != nil { return err } + // 已存集合维度不一致(如切换 embedding 模型)→ 重建。 + if has { + if coll, derr := m.cli.DescribeCollection(ctx, collection); derr == nil { + if existing := vectorDim(coll); existing != 0 && existing != dim { + log.Printf("[rag] 集合维度 %d≠%d,重建 %s", existing, dim, collection) + if err := m.cli.DropCollection(ctx, collection); err != nil { + return fmt.Errorf("drop collection: %w", err) + } + has = false + } + } + } if !has { schema := entity.NewSchema().WithName(collection).WithDescription("sundynix wiki vectors"). WithField(entity.NewField().WithName("id").WithDataType(entity.FieldTypeInt64).WithIsPrimaryKey(true).WithIsAutoID(true)). @@ -88,6 +102,22 @@ func (m *milvusStore) insert(ctx context.Context, kb string, texts []string, vec return m.cli.Flush(ctx, collection, false) } +// vectorDim 从集合 schema 读出向量字段维度(用于检测维度变化)。 +func vectorDim(coll *entity.Collection) int { + if coll == nil || coll.Schema == nil { + return 0 + } + for _, f := range coll.Schema.Fields { + if f.DataType == entity.FieldTypeFloatVector { + if d, ok := f.TypeParams["dim"]; ok { + n, _ := strconv.Atoi(d) + return n + } + } + } + return 0 +} + // Hit 是一条检索结果。 type Hit struct { Text string