feat(report): 报告生成端到端 — 规划→分章并行检索撰写→渲染真实 Word

- shared: 新增 intent=report 任务约定 + ReportPath（跨进程共享落盘目录，零配置对齐） - dispatcher: handleReport 专用编排（DeepSeek 规划大纲 → 各章并行 RAG 检索+撰写 → 汇聚 → report_render），Pool.Chat 非流式聚合；进度与正文经 Token 流实时回流 - mcp-go: 用标准库 archive/zip + OOXML 拼出真实可打开的 .docx（零额外依赖）， report_render 工具落盘到共享目录；附 docx 有效性测试 - gateway: POST /reports 触发；GET /reports/:id/download 下发 Word - desktop: 新增「报告」页（主题→实时编排进度→下载 Word），左导航置为就绪实测：DeepSeek 生成 5 章报告 → 渲染 5KB docx → file 识别为 Microsoft Word 2007+ → textutil 提取标题/各章正文完整。 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-12 14:02:21 +08:00
parent 8469cfc0db
commit ba8c6b3c43
15 changed files with 744 additions and 10 deletions
@@ -1,15 +1,169 @@
-// Package office 基于 UniOffice 提供 Word/文档渲染能力。
+// Package office 生成真实可用的 Word（.docx）文档。
+//
+// 这里不引第三方 Office 库（UniOffice 为商业授权、且会显著增重依赖），而是直接
+// 按 OOXML(WordprocessingML) 规范用标准库 archive/zip + 内联 XML 拼出最小但完整、
+// Word / Pages / WPS 均可正常打开的 .docx 包。零额外依赖，契合 clone 即跑的目标。
 package office

-import "context"
+import (
+	"archive/zip"
+	"bytes"
+	"context"
+	"fmt"
+	"strings"
+)

-// Renderer 把结构化数据渲染为 docx/xlsx 等文档。
+// Doc 累积段落，最终序列化为一个 .docx 字节流。
+type Doc struct {
+	body strings.Builder // word/document.xml 的 <w:body> 内部段落串
+}
+
+// NewDoc 新建一个空文档。
+func NewDoc() *Doc { return &Doc{} }
+
+// Title 加一行大标题（居中、加粗、约 18pt）。
+func (d *Doc) Title(text string) *Doc {
+	d.para(text, paraOpts{bold: true, sizeHalfPt: 36, center: true, spaceAfter: 240})
+	return d
+}
+
+// Heading 加一行小节标题（加粗、约 14pt）。
+func (d *Doc) Heading(text string) *Doc {
+	d.para(text, paraOpts{bold: true, sizeHalfPt: 28, spaceBefore: 240, spaceAfter: 120})
+	return d
+}
+
+// Para 加一个正文段落（约 11pt）。空串忽略。
+func (d *Doc) Para(text string) *Doc {
+	text = strings.TrimSpace(text)
+	if text == "" {
+		return d
+	}
+	d.para(text, paraOpts{sizeHalfPt: 22, spaceAfter: 120})
+	return d
+}
+
+// Body 把一段可能含多个换行的正文按行拆成多个段落。
+func (d *Doc) Body(text string) *Doc {
+	for _, line := range strings.Split(strings.ReplaceAll(text, "\r\n", "\n"), "\n") {
+		d.Para(line)
+	}
+	return d
+}
+
+type paraOpts struct {
+	bold        bool
+	center      bool
+	sizeHalfPt  int // OOXML 字号单位为半磅（half-points），22 = 11pt
+	spaceBefore int // 段前间距（twentieths of a point）
+	spaceAfter  int
+}
+
+func (d *Doc) para(text string, o paraOpts) {
+	d.body.WriteString("<w:p>")
+	// 段落属性：间距 + 居中。
+	d.body.WriteString("<w:pPr>")
+	if o.spaceBefore > 0 || o.spaceAfter > 0 {
+		fmt.Fprintf(&d.body, `<w:spacing w:before="%d" w:after="%d"/>`, o.spaceBefore, o.spaceAfter)
+	}
+	if o.center {
+		d.body.WriteString(`<w:jc w:val="center"/>`)
+	}
+	d.body.WriteString("</w:pPr>")
+	// 文本 run。
+	d.body.WriteString("<w:r><w:rPr>")
+	if o.bold {
+		d.body.WriteString("<w:b/>")
+	}
+	if o.sizeHalfPt > 0 {
+		fmt.Fprintf(&d.body, `<w:sz w:val="%d"/><w:szCs w:val="%d"/>`, o.sizeHalfPt, o.sizeHalfPt)
+	}
+	d.body.WriteString("</w:rPr>")
+	fmt.Fprintf(&d.body, `<w:t xml:space="preserve">%s</w:t>`, escapeXML(text))
+	d.body.WriteString("</w:r></w:p>")
+}
+
+// Bytes 把累积的段落打包为合规 .docx（zip + 三个核心 XML 部件）。
+func (d *Doc) Bytes() ([]byte, error) {
+	var buf bytes.Buffer
+	zw := zip.NewWriter(&buf)
+
+	parts := map[string]string{
+		"[Content_Types].xml": contentTypesXML,
+		"_rels/.rels":         relsXML,
+		"word/document.xml":   documentXML(d.body.String()),
+	}
+	for name, content := range parts {
+		w, err := zw.Create(name)
+		if err != nil {
+			return nil, err
+		}
+		if _, err := w.Write([]byte(content)); err != nil {
+			return nil, err
+		}
+	}
+	if err := zw.Close(); err != nil {
+		return nil, err
+	}
+	return buf.Bytes(), nil
+}
+
+// ---- Renderer：把结构化报告（标题 + 章节）渲染为 docx ----
+
+// Section 是报告的一章：小节标题 + 正文。
+type Section struct {
+	Heading string `json:"heading"`
+	Body    string `json:"body"`
+}
+
+// Renderer 把结构化数据渲染为 docx。
 type Renderer struct{}

 func NewRenderer() *Renderer { return &Renderer{} }

-// RenderDocx 生成 Word 文档并返回字节流。
-func (r *Renderer) RenderDocx(ctx context.Context, payload map[string]any) ([]byte, error) {
-	// TODO: 使用 unioffice/document 构建并序列化
-	return nil, nil
+// RenderReport 渲染「大标题 + 多章节」结构的报告为 .docx 字节流。
+func (r *Renderer) RenderReport(_ context.Context, title string, sections []Section) ([]byte, error) {
+	doc := NewDoc()
+	if title != "" {
+		doc.Title(title)
+	}
+	for _, s := range sections {
+		if s.Heading != "" {
+			doc.Heading(s.Heading)
+		}
+		doc.Body(s.Body)
+	}
+	return doc.Bytes()
+}
+
+// ---- OOXML 模板 ----
+
+const contentTypesXML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
+<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
+<Default Extension="xml" ContentType="application/xml"/>
+<Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
+</Types>`
+
+const relsXML = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
+<Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
+</Relationships>`
+
+func documentXML(body string) string {
+	return `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"><w:body>` +
+		body +
+		`<w:sectPr><w:pgSz w:w="11906" w:h="16838"/><w:pgMar w:top="1440" w:right="1440" w:bottom="1440" w:left="1440"/></w:sectPr></w:body></w:document>`
+}
+
+func escapeXML(s string) string {
+	r := strings.NewReplacer(
+		"&", "&amp;",
+		"<", "&lt;",
+		">", "&gt;",
+		`"`, "&quot;",
+		"'", "&apos;",
+	)
+	return r.Replace(s)
 }