init: initial commit

This commit is contained in:
Blizzard
2026-04-07 17:35:09 +08:00
commit 680ecc320f
129 changed files with 10562 additions and 0 deletions
+33
View File
@@ -0,0 +1,33 @@
package parser
import (
"fmt"
"strings"
)
// CADParser is a stub parser for CAD/DWG files.
// Real implementation requires CGO/GDAL — reserved for future development.
type CADParser struct{}
func (p *CADParser) SupportedExtensions() []string {
return []string{".dwg", ".dxf"}
}
func (p *CADParser) ParseToMarkdown(path string) (string, error) {
// Stub: simulate extracting layer names and annotations
var sb strings.Builder
sb.WriteString("## CAD 图纸解析结果 (模拟)\n\n")
sb.WriteString(fmt.Sprintf("**文件**: %s\n\n", path))
sb.WriteString("### 图层列表\n\n")
sb.WriteString("| 图层名 | 类型 | 元素数 |\n")
sb.WriteString("| --- | --- | --- |\n")
sb.WriteString("| STR_MAIN | 结构主体 | 142 |\n")
sb.WriteString("| DIM_TEXT | 标注文字 | 87 |\n")
sb.WriteString("| SEAL_V3 | 密封层 | 23 |\n\n")
sb.WriteString("### 标注摘要\n\n")
sb.WriteString("- 预留缝宽度: 2.5mm\n")
sb.WriteString("- 坐标基点: X=1240, Y=442\n")
sb.WriteString("\n> ⚠️ 本解析为模拟结果,完整 CAD 解析需集成 GDAL/LibreDWG。\n")
return sb.String(), nil
}
+62
View File
@@ -0,0 +1,62 @@
package parser
import (
"fmt"
"strings"
"github.com/xuri/excelize/v2"
)
// ExcelParser extracts all sheets from Excel files as Markdown tables.
type ExcelParser struct{}
func (p *ExcelParser) SupportedExtensions() []string {
return []string{".xlsx", ".xls"}
}
func (p *ExcelParser) ParseToMarkdown(path string) (string, error) {
f, err := excelize.OpenFile(path)
if err != nil {
return "", fmt.Errorf("open excel: %w", err)
}
defer f.Close()
var sb strings.Builder
for _, sheet := range f.GetSheetList() {
rows, err := f.GetRows(sheet)
if err != nil {
continue
}
if len(rows) == 0 {
continue
}
sb.WriteString(fmt.Sprintf("## Sheet: %s\n\n", sheet))
// Header row
sb.WriteString("| ")
for _, cell := range rows[0] {
sb.WriteString(cell + " | ")
}
sb.WriteString("\n|")
for range rows[0] {
sb.WriteString(" --- |")
}
sb.WriteString("\n")
// Data rows
for _, row := range rows[1:] {
sb.WriteString("| ")
for i := 0; i < len(rows[0]); i++ {
if i < len(row) {
sb.WriteString(row[i])
}
sb.WriteString(" | ")
}
sb.WriteString("\n")
}
sb.WriteString("\n")
}
return sb.String(), nil
}
+45
View File
@@ -0,0 +1,45 @@
package parser
import (
"fmt"
"path/filepath"
"strings"
)
// Parser interface for all document parsers.
type Parser interface {
SupportedExtensions() []string
ParseToMarkdown(path string) (string, error)
}
// Registry holds registered parsers keyed by extension.
type Registry struct {
parsers map[string]Parser
}
// NewRegistry creates a parser registry with all built-in parsers.
func NewRegistry() *Registry {
r := &Registry{parsers: make(map[string]Parser)}
r.Register(&ExcelParser{})
r.Register(&PDFParser{})
r.Register(&CADParser{})
r.Register(&WordParser{})
return r
}
// Register adds a parser for its supported extensions.
func (r *Registry) Register(p Parser) {
for _, ext := range p.SupportedExtensions() {
r.parsers[strings.ToLower(ext)] = p
}
}
// Parse dispatches to the appropriate parser based on file extension.
func (r *Registry) Parse(path string) (string, error) {
ext := strings.ToLower(filepath.Ext(path))
p, ok := r.parsers[ext]
if !ok {
return "", fmt.Errorf("unsupported file type: %s", ext)
}
return p.ParseToMarkdown(path)
}
+76
View File
@@ -0,0 +1,76 @@
package parser
import (
"fmt"
"os"
"strings"
"github.com/ledongthuc/pdf"
)
// PDFParser extracts plain text from PDF files using ledongthuc/pdf (open source).
type PDFParser struct{}
func (p *PDFParser) SupportedExtensions() []string {
return []string{".pdf"}
}
func (p *PDFParser) ParseToMarkdown(path string) (string, error) {
f, r, err := pdf.Open(path)
if err != nil {
return "", fmt.Errorf("open pdf: %w", err)
}
defer f.Close()
var sb strings.Builder
totalPages := r.NumPage()
for i := 1; i <= totalPages; i++ {
page := r.Page(i)
if page.V.IsNull() {
continue
}
text, err := page.GetPlainText(nil)
if err != nil {
continue
}
content := strings.TrimSpace(text)
if content != "" {
sb.WriteString(fmt.Sprintf("## Page %d\n\n%s\n\n", i, content))
}
}
if sb.Len() == 0 {
// Fallback: try reading entire content at once
content, err := readPDFPlainText(path)
if err == nil && content != "" {
return content, nil
}
return "", fmt.Errorf("no text content extracted from PDF")
}
return sb.String(), nil
}
func readPDFPlainText(path string) (string, error) {
f, err := os.Open(path)
if err != nil {
return "", err
}
defer f.Close()
stat, _ := f.Stat()
reader, err := pdf.NewReader(f, stat.Size())
if err != nil {
return "", err
}
var sb strings.Builder
for i := 1; i <= reader.NumPage(); i++ {
page := reader.Page(i)
if page.V.IsNull() {
continue
}
text, _ := page.GetPlainText(nil)
sb.WriteString(text)
}
return sb.String(), nil
}
+134
View File
@@ -0,0 +1,134 @@
package parser
import (
"fmt"
"log/slog"
"sync"
"github.com/wailsapp/wails/v3/pkg/application"
)
// Job represents a file parsing task.
type Job struct {
FilePath string
FileID string
ProjectID string
}
// Result is the outcome of a parsing job.
type Result struct {
Job Job
Content string
Err error
}
// ProcessingQueue manages concurrent file parsing with a worker pool.
type ProcessingQueue struct {
registry *Registry
jobs chan Job
results chan Result
concurrency int
wg sync.WaitGroup
onComplete func(Result)
}
// NewProcessingQueue creates a queue with the given concurrency limit.
func NewProcessingQueue(registry *Registry, concurrency int, onComplete func(Result)) *ProcessingQueue {
q := &ProcessingQueue{
registry: registry,
jobs: make(chan Job, 100),
results: make(chan Result, 100),
concurrency: concurrency,
onComplete: onComplete,
}
q.start()
return q
}
func (q *ProcessingQueue) start() {
// Start workers
for i := 0; i < q.concurrency; i++ {
go q.worker(i)
}
// Start result collector
go func() {
for result := range q.results {
if q.onComplete != nil {
q.onComplete(result)
}
}
}()
}
func (q *ProcessingQueue) worker(id int) {
for job := range q.jobs {
slog.Info("parser worker processing", "worker", id, "file", job.FilePath)
content, err := q.registry.Parse(job.FilePath)
if err != nil {
slog.Error("parse failed", "file", job.FilePath, "err", err)
}
q.results <- Result{Job: job, Content: content, Err: err}
q.wg.Done()
}
}
// Submit adds a job to the processing queue.
func (q *ProcessingQueue) Submit(job Job) {
q.wg.Add(1)
q.jobs <- job
}
// Wait blocks until all queued jobs are complete.
func (q *ProcessingQueue) Wait() {
q.wg.Wait()
}
// Close shuts down the queue.
func (q *ProcessingQueue) Close() {
close(q.jobs)
q.wg.Wait()
close(q.results)
}
// ParseService wraps the parsing pipeline for Wails binding.
type ParseService struct {
registry *Registry
queue *ProcessingQueue
}
// NewParseService creates a new parse service.
func NewParseService() *ParseService {
return &ParseService{
registry: NewRegistry(),
}
}
// ParseFile synchronously parses a single file. For Wails binding.
func (s *ParseService) ParseFile(path string) (string, error) {
content, err := s.registry.Parse(path)
if err != nil {
return "", fmt.Errorf("parse %s: %w", path, err)
}
return content, nil
}
// GetSupportedTypes returns supported file extensions.
func (s *ParseService) GetSupportedTypes() []string {
return []string{".xlsx", ".xls", ".pdf", ".dwg", ".dxf", ".docx"}
}
// ParseDeliveryStandard opens a file dialog to select a document, parses it, and returns the markdown.
func (s *ParseService) ParseDeliveryStandard() (string, error) {
dialog := application.Get().Dialog.OpenFile()
dialog.SetTitle("选择交付标准文件 (Delivery Standard)")
dialog.AddFilter("Documents", "*.pdf;*.xlsx;*.xls;*.docx")
path, err := dialog.PromptForSingleSelection()
if err != nil {
return "", fmt.Errorf("open file dialog: %w", err)
}
if path == "" {
return "", nil // user cancelled
}
return s.ParseFile(path)
}
+83
View File
@@ -0,0 +1,83 @@
package parser
import (
"archive/zip"
"encoding/xml"
"fmt"
"io"
)
// WordParser extracts text from .docx files
type WordParser struct{}
// SupportedExtensions returns extensions this parser handles
func (p *WordParser) SupportedExtensions() []string {
return []string{".docx"}
}
type wDocument struct {
Body wBody `xml:"body"`
}
type wBody struct {
P []wP `xml:"p"`
}
type wP struct {
R []wR `xml:"r"`
}
type wR struct {
T []string `xml:"t"`
}
// ParseToMarkdown extracts text from the word document and returns it as Markdown-like text
func (p *WordParser) ParseToMarkdown(path string) (string, error) {
r, err := zip.OpenReader(path)
if err != nil {
return "", fmt.Errorf("failed to open docx as zip: %w", err)
}
defer r.Close()
var docFile *zip.File
for _, f := range r.File {
if f.Name == "word/document.xml" {
docFile = f
break
}
}
if docFile == nil {
return "", fmt.Errorf("invalid docx file: word/document.xml not found")
}
rc, err := docFile.Open()
if err != nil {
return "", err
}
defer rc.Close()
data, err := io.ReadAll(rc)
if err != nil {
return "", err
}
var doc wDocument
if err := xml.Unmarshal(data, &doc); err != nil {
return "", err
}
var md string
for _, paragraph := range doc.Body.P {
var pText string
for _, run := range paragraph.R {
for _, text := range run.T {
pText += text
}
}
if pText != "" {
md += pText + "\n\n"
}
}
return md, nil
}