85a5c2c1e7
检索从向量单路升级为混合:向量(Milvus) + 全文(Bleve BM25) → RRF 融合 → 可选 rerank(DashScope gte-rerank)。 - rag/bleve.go: Bleve 全文索引(内存,随 ingest 写入;kb 过滤);ingest 同步写 Milvus+Bleve - rag/fuse.go: RRF(Reciprocal Rank Fusion, k=60, 按文本去重)融合多路排序 - rag/rerank.go: DashScope gte-rerank 客户端(可选,env 配置,失败降级 RRF) - rag/rag.go: Search 改混合(向量+全文→RRF→可选rerank→topK);main 读 RERANK_* env - 验证: 全模块 build✓ + e2e PASS; live——入库写双索引;查'NATS'→全文精确命中#1+向量 →RRF NATS 排首(向量=4 全文=1);接 DashScope gte-rerank(百炼 key 有权限)→relevance score 0.19 真重排;retriever 节点端到端→DeepSeek 答 Milvus - 边界: Neo4j 图路(GraphRAG,需实体抽取)推迟;Bleve 内存索引重启重建;rerank 走 env (TODO 同 embedding 搬控制面 kind=rerank) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
86 lines
2.4 KiB
Go
86 lines
2.4 KiB
Go
package rag
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"sort"
|
|
"time"
|
|
)
|
|
|
|
// rerankClient 调用 DashScope 文本重排(gte-rerank)。可选阶段:未配则跳过。
|
|
// DashScope 原生格式(非 OpenAI 兼容):
|
|
//
|
|
// POST {baseURL} {model, input:{query, documents}, parameters:{top_n, return_documents:false}}
|
|
// resp {output:{results:[{index, relevance_score}]}}
|
|
type rerankClient struct {
|
|
baseURL string
|
|
apiKey string
|
|
model string
|
|
hc *http.Client
|
|
}
|
|
|
|
func newRerankClient(baseURL, apiKey, model string) *rerankClient {
|
|
if baseURL == "" || model == "" {
|
|
return nil
|
|
}
|
|
return &rerankClient{baseURL: baseURL, apiKey: apiKey, model: model, hc: &http.Client{Timeout: 20 * time.Second}}
|
|
}
|
|
|
|
func (r *rerankClient) ready() bool { return r != nil && r.baseURL != "" }
|
|
|
|
// rerank 用重排模型对候选重新打分排序,返回前 topN。出错时返回原序(降级)。
|
|
func (r *rerankClient) rerank(ctx context.Context, query string, hits []Hit, topN int) ([]Hit, error) {
|
|
docs := make([]string, len(hits))
|
|
for i, h := range hits {
|
|
docs[i] = h.Text
|
|
}
|
|
body, _ := json.Marshal(map[string]any{
|
|
"model": r.model,
|
|
"input": map[string]any{"query": query, "documents": docs},
|
|
"parameters": map[string]any{"top_n": topN, "return_documents": false},
|
|
})
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, r.baseURL, bytes.NewReader(body))
|
|
if err != nil {
|
|
return hits, err
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
if r.apiKey != "" {
|
|
req.Header.Set("Authorization", "Bearer "+r.apiKey)
|
|
}
|
|
resp, err := r.hc.Do(req)
|
|
if err != nil {
|
|
return hits, err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode >= 400 {
|
|
buf := new(bytes.Buffer)
|
|
_, _ = buf.ReadFrom(resp.Body)
|
|
return hits, fmt.Errorf("rerank http %d: %s", resp.StatusCode, buf.String())
|
|
}
|
|
var out struct {
|
|
Output struct {
|
|
Results []struct {
|
|
Index int `json:"index"`
|
|
RelevanceScore float32 `json:"relevance_score"`
|
|
} `json:"results"`
|
|
} `json:"output"`
|
|
}
|
|
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
|
|
return hits, err
|
|
}
|
|
res := make([]Hit, 0, len(out.Output.Results))
|
|
for _, rr := range out.Output.Results {
|
|
if rr.Index >= 0 && rr.Index < len(hits) {
|
|
res = append(res, Hit{Text: hits[rr.Index].Text, Score: rr.RelevanceScore})
|
|
}
|
|
}
|
|
if len(res) == 0 {
|
|
return hits, nil
|
|
}
|
|
sort.Slice(res, func(i, j int) bool { return res[i].Score > res[j].Score })
|
|
return res, nil
|
|
}
|