feat: 添加注释

2026-04-01 15:29:35 +08:00
parent aef2e152dc
commit 6162c9110c
28 changed files with 1293 additions and 298 deletions
@@ -1,3 +1,9 @@
+// Package service 提供 CSV 格式的知识库导入功能。
+//
+// CSV 被选为首要导入格式，原因：
+//  1. 轻量、无格式依赖，可用记事本/Excel/Numbers 等任意工具创建；
+//  2. Go 标准库 encoding/csv 原生支持，无需引入任何第三方依赖；
+//  3. 对于知识库数据（纯文本问答），CSV 已足够表达所有字段。
 package service

 import (
@@ -11,19 +17,41 @@ import (
 	"AI-Expert-Sidebar/internal/models"
 )

-// ImportResult summarises the outcome of a CSV import.
+// ImportResult 是导入操作的结果摘要，返回给前端显示 Toast 通知。
+// 同时用于 CSV 和 Excel 导入，两者共用此结构体。
 type ImportResult struct {
-	Imported int    `json:"imported"`
-	Skipped  int    `json:"skipped"`
-	Error    string `json:"error,omitempty"`
+	// Imported 是成功写入数据库的行数。
+	Imported int `json:"imported"`
+	// Skipped 是被跳过的行数（空行、缺字段、写入失败等）。
+	Skipped int `json:"skipped"`
+	// Error 非空时表示导入整体失败（文件不存在、格式错误等），
+	// 此时 Imported/Skipped 没有意义。
+	Error string `json:"error,omitempty"`
 }

-// ImportCSV reads a CSV file and inserts records into the active knowledge library.
+// ImportCSV 读取 CSV 文件并将合法行批量插入到当前活跃知识库。
 //
-// Required columns (case-insensitive): keyword, question, answer
-// Optional column: category (defaults to "通用")
+// # 期望的 CSV 格式
 //
-// The first row must be the header.
+// 第一行必须是表头（顺序任意，大小写无关）：
+//
+//	keyword,question,answer,category
+//	浇水频率,多肉多久浇一次水,10-14天一次,浇水
+//
+// required 列：keyword / question / answer（三者缺一则整批失败）
+// optional 列：category（缺失时默认为 "通用"）
+//
+// # 容错策略
+//
+// 单行解析失败（字段为空、列数不足）时只 skipped++，不中断整个导入。
+// 这样一份有 5% 脏数据的 CSV 依然能有效导入 95% 的正常数据，
+// 比"遇到错误立即中止"的方案用户体验好很多。
+//
+// # 为什么用流式读取（csv.Reader）而不是一次性读入内存
+//
+// 对于超大 CSV（数万条），一次性 ioutil.ReadAll 会占用大量内存；
+// csv.Reader 逐行读取，内存消耗恒定（约等于单行大小），
+// 且在写入失败时可以立即停止。
 func ImportCSV(filePath string) ImportResult {
 	f, err := os.Open(filePath)
 	if err != nil {
@@ -33,14 +61,14 @@ func ImportCSV(filePath string) ImportResult {

 	db := database.Get()
 	if db == nil {
-		return ImportResult{Error: "知识库未初始化"}
+		return ImportResult{Error: "知识库未初始化，请先创建或选择知识库"}
 	}

 	r := csv.NewReader(f)
-	r.TrimLeadingSpace = true
-	r.LazyQuotes = true
+	r.TrimLeadingSpace = true // 自动去除字段前后的空格
+	r.LazyQuotes = true       // 宽松解析：允许字段内出现未转义的引号

-	// Read and normalise header
+	// 读取并标准化表头行，构建列名→列序号的映射
 	header, err := r.Read()
 	if err != nil {
 		return ImportResult{Error: fmt.Sprintf("读取表头失败: %v", err)}
@@ -49,9 +77,10 @@ func ImportCSV(filePath string) ImportResult {
 	for i, h := range header {
 		colIdx[strings.ToLower(strings.TrimSpace(h))] = i
 	}
+	// 严格校验必需列是否存在，给出明确错误信息而非 index out of range panic
 	for _, required := range []string{"keyword", "question", "answer"} {
 		if _, ok := colIdx[required]; !ok {
-			return ImportResult{Error: fmt.Sprintf("CSV 缺少必需列: %q (需要: keyword, question, answer)", required)}
+			return ImportResult{Error: fmt.Sprintf("CSV 缺少必需列: %q（需要: keyword, question, answer）", required)}
 		}
 	}
 	catIdx, hasCat := colIdx["category"]
@@ -63,12 +92,14 @@ func ImportCSV(filePath string) ImportResult {
 			break
 		}
 		if err != nil {
+			// 单行解析错误（如奇数引号）：跳过该行，继续下一行
 			skipped++
 			continue
 		}
 		keyword := strings.TrimSpace(row[colIdx["keyword"]])
 		question := strings.TrimSpace(row[colIdx["question"]])
 		answer := strings.TrimSpace(row[colIdx["answer"]])
+		// 三个核心字段任一为空则视为无效行
 		if keyword == "" || question == "" || answer == "" {
 			skipped++
 			continue
@@ -81,6 +112,7 @@ func ImportCSV(filePath string) ImportResult {
 		}
 		entry := models.Entry{Keyword: keyword, Question: question, Answer: answer, Category: cat}
 		if err := db.Create(&entry).Error; err != nil {
+			// 单条写入失败（如约束冲突）不影响其他行
 			skipped++
 		} else {
 			imported++