init: initial commit

2026-04-07 17:35:09 +08:00
commit 680ecc320f
129 changed files with 10562 additions and 0 deletions
@@ -0,0 +1,33 @@
+package parser
+
+import (
+	"fmt"
+	"strings"
+)
+
+// CADParser is a stub parser for CAD/DWG files.
+// Real implementation requires CGO/GDAL — reserved for future development.
+type CADParser struct{}
+
+func (p *CADParser) SupportedExtensions() []string {
+	return []string{".dwg", ".dxf"}
+}
+
+func (p *CADParser) ParseToMarkdown(path string) (string, error) {
+	// Stub: simulate extracting layer names and annotations
+	var sb strings.Builder
+	sb.WriteString("## CAD 图纸解析结果 (模拟)\n\n")
+	sb.WriteString(fmt.Sprintf("**文件**: %s\n\n", path))
+	sb.WriteString("### 图层列表\n\n")
+	sb.WriteString("| 图层名 | 类型 | 元素数 |\n")
+	sb.WriteString("| --- | --- | --- |\n")
+	sb.WriteString("| STR_MAIN | 结构主体 | 142 |\n")
+	sb.WriteString("| DIM_TEXT | 标注文字 | 87 |\n")
+	sb.WriteString("| SEAL_V3 | 密封层 | 23 |\n\n")
+	sb.WriteString("### 标注摘要\n\n")
+	sb.WriteString("- 预留缝宽度: 2.5mm\n")
+	sb.WriteString("- 坐标基点: X=1240, Y=442\n")
+	sb.WriteString("\n> ⚠️ 本解析为模拟结果，完整 CAD 解析需集成 GDAL/LibreDWG。\n")
+
+	return sb.String(), nil
+}
@@ -0,0 +1,62 @@
+package parser
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/xuri/excelize/v2"
+)
+
+// ExcelParser extracts all sheets from Excel files as Markdown tables.
+type ExcelParser struct{}
+
+func (p *ExcelParser) SupportedExtensions() []string {
+	return []string{".xlsx", ".xls"}
+}
+
+func (p *ExcelParser) ParseToMarkdown(path string) (string, error) {
+	f, err := excelize.OpenFile(path)
+	if err != nil {
+		return "", fmt.Errorf("open excel: %w", err)
+	}
+	defer f.Close()
+
+	var sb strings.Builder
+	for _, sheet := range f.GetSheetList() {
+		rows, err := f.GetRows(sheet)
+		if err != nil {
+			continue
+		}
+		if len(rows) == 0 {
+			continue
+		}
+
+		sb.WriteString(fmt.Sprintf("## Sheet: %s\n\n", sheet))
+
+		// Header row
+		sb.WriteString("| ")
+		for _, cell := range rows[0] {
+			sb.WriteString(cell + " | ")
+		}
+		sb.WriteString("\n|")
+		for range rows[0] {
+			sb.WriteString(" --- |")
+		}
+		sb.WriteString("\n")
+
+		// Data rows
+		for _, row := range rows[1:] {
+			sb.WriteString("| ")
+			for i := 0; i < len(rows[0]); i++ {
+				if i < len(row) {
+					sb.WriteString(row[i])
+				}
+				sb.WriteString(" | ")
+			}
+			sb.WriteString("\n")
+		}
+		sb.WriteString("\n")
+	}
+
+	return sb.String(), nil
+}
@@ -0,0 +1,45 @@
+package parser
+
+import (
+	"fmt"
+	"path/filepath"
+	"strings"
+)
+
+// Parser interface for all document parsers.
+type Parser interface {
+	SupportedExtensions() []string
+	ParseToMarkdown(path string) (string, error)
+}
+
+// Registry holds registered parsers keyed by extension.
+type Registry struct {
+	parsers map[string]Parser
+}
+
+// NewRegistry creates a parser registry with all built-in parsers.
+func NewRegistry() *Registry {
+	r := &Registry{parsers: make(map[string]Parser)}
+	r.Register(&ExcelParser{})
+	r.Register(&PDFParser{})
+	r.Register(&CADParser{})
+	r.Register(&WordParser{})
+	return r
+}
+
+// Register adds a parser for its supported extensions.
+func (r *Registry) Register(p Parser) {
+	for _, ext := range p.SupportedExtensions() {
+		r.parsers[strings.ToLower(ext)] = p
+	}
+}
+
+// Parse dispatches to the appropriate parser based on file extension.
+func (r *Registry) Parse(path string) (string, error) {
+	ext := strings.ToLower(filepath.Ext(path))
+	p, ok := r.parsers[ext]
+	if !ok {
+		return "", fmt.Errorf("unsupported file type: %s", ext)
+	}
+	return p.ParseToMarkdown(path)
+}
@@ -0,0 +1,76 @@
+package parser
+
+import (
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/ledongthuc/pdf"
+)
+
+// PDFParser extracts plain text from PDF files using ledongthuc/pdf (open source).
+type PDFParser struct{}
+
+func (p *PDFParser) SupportedExtensions() []string {
+	return []string{".pdf"}
+}
+
+func (p *PDFParser) ParseToMarkdown(path string) (string, error) {
+	f, r, err := pdf.Open(path)
+	if err != nil {
+		return "", fmt.Errorf("open pdf: %w", err)
+	}
+	defer f.Close()
+
+	var sb strings.Builder
+	totalPages := r.NumPage()
+
+	for i := 1; i <= totalPages; i++ {
+		page := r.Page(i)
+		if page.V.IsNull() {
+			continue
+		}
+		text, err := page.GetPlainText(nil)
+		if err != nil {
+			continue
+		}
+		content := strings.TrimSpace(text)
+		if content != "" {
+			sb.WriteString(fmt.Sprintf("## Page %d\n\n%s\n\n", i, content))
+		}
+	}
+
+	if sb.Len() == 0 {
+		// Fallback: try reading entire content at once
+		content, err := readPDFPlainText(path)
+		if err == nil && content != "" {
+			return content, nil
+		}
+		return "", fmt.Errorf("no text content extracted from PDF")
+	}
+
+	return sb.String(), nil
+}
+
+func readPDFPlainText(path string) (string, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+	stat, _ := f.Stat()
+	reader, err := pdf.NewReader(f, stat.Size())
+	if err != nil {
+		return "", err
+	}
+	var sb strings.Builder
+	for i := 1; i <= reader.NumPage(); i++ {
+		page := reader.Page(i)
+		if page.V.IsNull() {
+			continue
+		}
+		text, _ := page.GetPlainText(nil)
+		sb.WriteString(text)
+	}
+	return sb.String(), nil
+}
@@ -0,0 +1,134 @@
+package parser
+
+import (
+	"fmt"
+	"log/slog"
+	"sync"
+
+	"github.com/wailsapp/wails/v3/pkg/application"
+)
+
+// Job represents a file parsing task.
+type Job struct {
+	FilePath  string
+	FileID    string
+	ProjectID string
+}
+
+// Result is the outcome of a parsing job.
+type Result struct {
+	Job     Job
+	Content string
+	Err     error
+}
+
+// ProcessingQueue manages concurrent file parsing with a worker pool.
+type ProcessingQueue struct {
+	registry    *Registry
+	jobs        chan Job
+	results     chan Result
+	concurrency int
+	wg          sync.WaitGroup
+	onComplete  func(Result)
+}
+
+// NewProcessingQueue creates a queue with the given concurrency limit.
+func NewProcessingQueue(registry *Registry, concurrency int, onComplete func(Result)) *ProcessingQueue {
+	q := &ProcessingQueue{
+		registry:    registry,
+		jobs:        make(chan Job, 100),
+		results:     make(chan Result, 100),
+		concurrency: concurrency,
+		onComplete:  onComplete,
+	}
+	q.start()
+	return q
+}
+
+func (q *ProcessingQueue) start() {
+	// Start workers
+	for i := 0; i < q.concurrency; i++ {
+		go q.worker(i)
+	}
+	// Start result collector
+	go func() {
+		for result := range q.results {
+			if q.onComplete != nil {
+				q.onComplete(result)
+			}
+		}
+	}()
+}
+
+func (q *ProcessingQueue) worker(id int) {
+	for job := range q.jobs {
+		slog.Info("parser worker processing", "worker", id, "file", job.FilePath)
+		content, err := q.registry.Parse(job.FilePath)
+		if err != nil {
+			slog.Error("parse failed", "file", job.FilePath, "err", err)
+		}
+		q.results <- Result{Job: job, Content: content, Err: err}
+		q.wg.Done()
+	}
+}
+
+// Submit adds a job to the processing queue.
+func (q *ProcessingQueue) Submit(job Job) {
+	q.wg.Add(1)
+	q.jobs <- job
+}
+
+// Wait blocks until all queued jobs are complete.
+func (q *ProcessingQueue) Wait() {
+	q.wg.Wait()
+}
+
+// Close shuts down the queue.
+func (q *ProcessingQueue) Close() {
+	close(q.jobs)
+	q.wg.Wait()
+	close(q.results)
+}
+
+// ParseService wraps the parsing pipeline for Wails binding.
+type ParseService struct {
+	registry *Registry
+	queue    *ProcessingQueue
+}
+
+// NewParseService creates a new parse service.
+func NewParseService() *ParseService {
+	return &ParseService{
+		registry: NewRegistry(),
+	}
+}
+
+// ParseFile synchronously parses a single file. For Wails binding.
+func (s *ParseService) ParseFile(path string) (string, error) {
+	content, err := s.registry.Parse(path)
+	if err != nil {
+		return "", fmt.Errorf("parse %s: %w", path, err)
+	}
+	return content, nil
+}
+
+// GetSupportedTypes returns supported file extensions.
+func (s *ParseService) GetSupportedTypes() []string {
+	return []string{".xlsx", ".xls", ".pdf", ".dwg", ".dxf", ".docx"}
+}
+
+// ParseDeliveryStandard opens a file dialog to select a document, parses it, and returns the markdown.
+func (s *ParseService) ParseDeliveryStandard() (string, error) {
+	dialog := application.Get().Dialog.OpenFile()
+	dialog.SetTitle("选择交付标准文件 (Delivery Standard)")
+	dialog.AddFilter("Documents", "*.pdf;*.xlsx;*.xls;*.docx")
+
+	path, err := dialog.PromptForSingleSelection()
+	if err != nil {
+		return "", fmt.Errorf("open file dialog: %w", err)
+	}
+	if path == "" {
+		return "", nil // user cancelled
+	}
+	return s.ParseFile(path)
+}
@@ -0,0 +1,83 @@
+package parser
+
+import (
+	"archive/zip"
+	"encoding/xml"
+	"fmt"
+	"io"
+)
+
+// WordParser extracts text from .docx files
+type WordParser struct{}
+
+// SupportedExtensions returns extensions this parser handles
+func (p *WordParser) SupportedExtensions() []string {
+	return []string{".docx"}
+}
+
+type wDocument struct {
+	Body wBody `xml:"body"`
+}
+
+type wBody struct {
+	P []wP `xml:"p"`
+}
+
+type wP struct {
+	R []wR `xml:"r"`
+}
+
+type wR struct {
+	T []string `xml:"t"`
+}
+
+// ParseToMarkdown extracts text from the word document and returns it as Markdown-like text
+func (p *WordParser) ParseToMarkdown(path string) (string, error) {
+	r, err := zip.OpenReader(path)
+	if err != nil {
+		return "", fmt.Errorf("failed to open docx as zip: %w", err)
+	}
+	defer r.Close()
+
+	var docFile *zip.File
+	for _, f := range r.File {
+		if f.Name == "word/document.xml" {
+			docFile = f
+			break
+		}
+	}
+	if docFile == nil {
+		return "", fmt.Errorf("invalid docx file: word/document.xml not found")
+	}
+
+	rc, err := docFile.Open()
+	if err != nil {
+		return "", err
+	}
+	defer rc.Close()
+
+	data, err := io.ReadAll(rc)
+	if err != nil {
+		return "", err
+	}
+
+	var doc wDocument
+	if err := xml.Unmarshal(data, &doc); err != nil {
+		return "", err
+	}
+
+	var md string
+	for _, paragraph := range doc.Body.P {
+		var pText string
+		for _, run := range paragraph.R {
+			for _, text := range run.T {
+				pText += text
+			}
+		}
+		if pText != "" {
+			md += pText + "\n\n"
+		}
+	}
+
+	return md, nil
+}