94 lines
2.5 KiB
Go
94 lines
2.5 KiB
Go
package vector
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"engimind/internal/models"
|
|
)
|
|
|
|
// ContextChunk is a search result with source metadata.
|
|
type ContextChunk struct {
|
|
Text string `json:"text"`
|
|
SourceID string `json:"sourceId"`
|
|
Score float32 `json:"score"`
|
|
}
|
|
|
|
// RAGService orchestrates embedding + vector search for retrieval.
|
|
type RAGService struct {
|
|
embedding *EmbeddingService
|
|
store *QdrantStore
|
|
}
|
|
|
|
// NewRAGService creates a RAG service.
|
|
func NewRAGService(embedding *EmbeddingService, store *QdrantStore) *RAGService {
|
|
return &RAGService{embedding: embedding, store: store}
|
|
}
|
|
|
|
// CollectionName returns the Qdrant collection name for a project.
|
|
func CollectionName(projectID string) string {
|
|
return fmt.Sprintf("engimind_%s", projectID)
|
|
}
|
|
|
|
// IndexDocument chunks and indexes a parsed document.
|
|
func (s *RAGService) IndexDocument(ctx context.Context, projectID string, source models.SourceFile, content string, embeddingCfg EmbeddingConfig) error {
|
|
colName := CollectionName(projectID)
|
|
if err := s.store.EnsureCollection(ctx, colName); err != nil {
|
|
return err
|
|
}
|
|
|
|
textChunks := ChunkText(content, 500, 50)
|
|
var chunks []Chunk
|
|
for i, text := range textChunks {
|
|
vec, err := s.embedding.GetEmbedding(
|
|
text, embeddingCfg.BaseURL, embeddingCfg.Model,
|
|
embeddingCfg.APIKey, embeddingCfg.Provider,
|
|
)
|
|
if err != nil {
|
|
return fmt.Errorf("embed chunk %d: %w", i, err)
|
|
}
|
|
chunks = append(chunks, Chunk{
|
|
ID: fmt.Sprintf("%s-chunk-%d", source.ID, i),
|
|
SourceID: source.ID,
|
|
Text: text,
|
|
Vector: vec,
|
|
})
|
|
}
|
|
|
|
return s.store.Insert(ctx, colName, chunks)
|
|
}
|
|
|
|
// SearchContext retrieves relevant text chunks for a query.
|
|
func (s *RAGService) SearchContext(ctx context.Context, projectID, question string, topK int, embeddingCfg EmbeddingConfig) ([]ContextChunk, error) {
|
|
queryVec, err := s.embedding.GetEmbedding(
|
|
question, embeddingCfg.BaseURL, embeddingCfg.Model,
|
|
embeddingCfg.APIKey, embeddingCfg.Provider,
|
|
)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("embed query: %w", err)
|
|
}
|
|
|
|
colName := CollectionName(projectID)
|
|
results, err := s.store.Search(ctx, colName, queryVec, uint64(topK))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
contextChunks := make([]ContextChunk, len(results))
|
|
for i, r := range results {
|
|
contextChunks[i] = ContextChunk{
|
|
Text: r.Text,
|
|
SourceID: r.SourceID,
|
|
}
|
|
}
|
|
return contextChunks, nil
|
|
}
|
|
|
|
// EmbeddingConfig holds the config needed to call an embedding API.
|
|
type EmbeddingConfig struct {
|
|
BaseURL string
|
|
Model string
|
|
APIKey string
|
|
Provider string
|
|
}
|