Files
sundynix-agentix/sundynix-mcp-go/internal/rag/rerank.go
T
Blizzard 85a5c2c1e7 feat(rag): 混合检索融合 — Milvus 向量 + Bleve 全文 + RRF + DashScope rerank
检索从向量单路升级为混合:向量(Milvus) + 全文(Bleve BM25) → RRF 融合 →
可选 rerank(DashScope gte-rerank)。

- rag/bleve.go: Bleve 全文索引(内存,随 ingest 写入;kb 过滤);ingest 同步写 Milvus+Bleve
- rag/fuse.go: RRF(Reciprocal Rank Fusion, k=60, 按文本去重)融合多路排序
- rag/rerank.go: DashScope gte-rerank 客户端(可选,env 配置,失败降级 RRF)
- rag/rag.go: Search 改混合(向量+全文→RRF→可选rerank→topK);main 读 RERANK_* env
- 验证: 全模块 build✓ + e2e PASS; live——入库写双索引;查'NATS'→全文精确命中#1+向量
  →RRF NATS 排首(向量=4 全文=1);接 DashScope gte-rerank(百炼 key 有权限)→relevance
  score 0.19 真重排;retriever 节点端到端→DeepSeek 答 Milvus
- 边界: Neo4j 图路(GraphRAG,需实体抽取)推迟;Bleve 内存索引重启重建;rerank 走 env
  (TODO 同 embedding 搬控制面 kind=rerank)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-11 09:53:37 +08:00

86 lines
2.4 KiB
Go

package rag
import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"sort"
"time"
)
// rerankClient 调用 DashScope 文本重排(gte-rerank)。可选阶段:未配则跳过。
// DashScope 原生格式(非 OpenAI 兼容):
//
// POST {baseURL} {model, input:{query, documents}, parameters:{top_n, return_documents:false}}
// resp {output:{results:[{index, relevance_score}]}}
type rerankClient struct {
baseURL string
apiKey string
model string
hc *http.Client
}
func newRerankClient(baseURL, apiKey, model string) *rerankClient {
if baseURL == "" || model == "" {
return nil
}
return &rerankClient{baseURL: baseURL, apiKey: apiKey, model: model, hc: &http.Client{Timeout: 20 * time.Second}}
}
func (r *rerankClient) ready() bool { return r != nil && r.baseURL != "" }
// rerank 用重排模型对候选重新打分排序,返回前 topN。出错时返回原序(降级)。
func (r *rerankClient) rerank(ctx context.Context, query string, hits []Hit, topN int) ([]Hit, error) {
docs := make([]string, len(hits))
for i, h := range hits {
docs[i] = h.Text
}
body, _ := json.Marshal(map[string]any{
"model": r.model,
"input": map[string]any{"query": query, "documents": docs},
"parameters": map[string]any{"top_n": topN, "return_documents": false},
})
req, err := http.NewRequestWithContext(ctx, http.MethodPost, r.baseURL, bytes.NewReader(body))
if err != nil {
return hits, err
}
req.Header.Set("Content-Type", "application/json")
if r.apiKey != "" {
req.Header.Set("Authorization", "Bearer "+r.apiKey)
}
resp, err := r.hc.Do(req)
if err != nil {
return hits, err
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
buf := new(bytes.Buffer)
_, _ = buf.ReadFrom(resp.Body)
return hits, fmt.Errorf("rerank http %d: %s", resp.StatusCode, buf.String())
}
var out struct {
Output struct {
Results []struct {
Index int `json:"index"`
RelevanceScore float32 `json:"relevance_score"`
} `json:"results"`
} `json:"output"`
}
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return hits, err
}
res := make([]Hit, 0, len(out.Output.Results))
for _, rr := range out.Output.Results {
if rr.Index >= 0 && rr.Index < len(hits) {
res = append(res, Hit{Text: hits[rr.Index].Text, Score: rr.RelevanceScore})
}
}
if len(res) == 0 {
return hits, nil
}
sort.Slice(res, func(i, j int) bool { return res[i].Score > res[j].Score })
return res, nil
}