sundynix-agentix/sundynix-dispatcher/internal/eino/report.go

package eino

import (
	"context"
	"encoding/json"
	"fmt"
	"log"
	"strings"
	"sync"
	"time"

	"github.com/sundynix/sundynix-dispatcher/internal/dsl"
	"github.com/sundynix/sundynix-dispatcher/internal/llm"
	"github.com/sundynix/sundynix-shared/contract"
)

// 报告生成的并发与超时参数。
const (
	reportFanout     = 4                // 章节并行撰写的最大并发
	reportRenderWait = 12 * time.Second // 渲染 docx 的等待上限
)

// reportOutline 是规划阶段产出的大纲。
type reportOutline struct {
	Title    string   `json:"title"`
	Sections []string `json:"sections"`
}

// reportSection 是一章成稿（标题 + 正文）。
type reportSection struct {
	Heading string `json:"heading"`
	Body    string `json:"body"`
}

// handleReport 执行报告生成的专用多步编排：
//
//	规划大纲 → 各章节并行(RAG 检索 + LLM 撰写) → 汇聚 → 渲染 Word(.docx) → 回流进度与正文
//
// 全程把人可读的 Markdown 进度与正文经 sundynix.streams.<id> 流回客户端；
// 最终调 mcp-go 的 report_render 落盘 docx，客户端凭 task_id 下载。
func (o *Orchestrator) handleReport(ctx context.Context, t *contract.Task, tr *execTracer) error {
	defer func() { _ = o.sink.CompleteStream(t.ID) }()

	topic, _ := t.Meta[contract.MetaTopic].(string)
	kb, _ := t.Meta[contract.MetaKB].(string)
	if topic == "" {
		topic = dsl.Compile(t.Graph).Query // 兜底：从 DSL 取用户输入
	}
	if topic == "" {
		topic = "未命名报告"
	}
	log.Printf("[report] task %s 生成报告: topic=%q kb=%q", t.ID, topic, kb)
	tr.info("task", "system", "报告任务受理", fmt.Sprintf("主题：%s%s", topic, kbSuffix(kb)))

	o.emit(t.ID, "> 正在规划大纲…\n\n")
	endPlan := tr.span("plan", "plan", "规划大纲")
	outline := o.planOutline(ctx, topic)
	endPlan(fmt.Sprintf("%d 章：%s", len(outline.Sections), strings.Join(outline.Sections, " / ")), nil)

	o.emit(t.ID, fmt.Sprintf("**报告大纲**（%d 章）\n", len(outline.Sections)))
	for i, s := range outline.Sections {
		o.emit(t.ID, fmt.Sprintf("%d. %s\n", i+1, s))
	}
	if kb != "" {
		o.emit(t.ID, fmt.Sprintf("\n> 正在并行检索知识库 %q 资料并撰写各章…\n\n", kb))
	} else {
		o.emit(t.ID, "\n> 正在并行撰写各章…\n\n")
	}

	sections := o.writeSections(ctx, topic, kb, outline.Sections, tr)

	// 把完整报告正文流式呈现给客户端。
	o.emit(t.ID, "\n---\n\n# "+firstNonEmpty(outline.Title, topic)+"\n\n")
	for _, s := range sections {
		o.emit(t.ID, "## "+s.Heading+"\n\n"+s.Body+"\n\n")
	}

	// 只持久化报告源数据（标题+章节），不在生成阶段渲染；导出时再按需出 Word/PDF/Markdown。
	endStore := tr.span("store", "render", "保存报告源")
	if o.storeReport(ctx, t.ID, firstNonEmpty(outline.Title, topic), sections) {
		endStore("已保存，可按需导出 Word/PDF/Markdown", nil)
		o.emit(t.ID, "---\n✅ 报告正文已生成，可在上方导出 **Word / PDF / Markdown**。\n")
		log.Printf("[report] task %s 完成，源已存", t.ID)
	} else {
		endStore("源保存失败", fmt.Errorf("store unavailable"))
		o.emit(t.ID, "---\n⚠️ 报告源保存失败（导出可能不可用），以上为报告正文。\n")
	}
	o.breaker.Report(true)
	return nil
}

func kbSuffix(kb string) string {
	if kb == "" {
		return "（不挂知识库）"
	}
	return "，知识库 " + kb
}

// planOutline 让模型规划 3–5 章大纲；模型不可用/解析失败则用通用兜底大纲。
func (o *Orchestrator) planOutline(ctx context.Context, topic string) reportOutline {
	fallback := reportOutline{Title: topic, Sections: []string{"背景与现状", "核心分析", "结论与建议"}}
	if !o.pool.Ready() {
		return fallback
	}
	sys := "你是资深报告撰稿人，擅长搭建清晰的报告结构。"
	user := fmt.Sprintf("请为主题《%s》规划一份报告大纲。"+
		"只输出 JSON：{\"title\":\"报告标题\",\"sections\":[\"章节标题\", ...]}，3 到 5 章，不要任何多余文字。", topic)
	txt, err := o.pool.Chat(ctx, []llm.ChatMessage{{Role: "system", Content: sys}, {Role: "user", Content: user}})
	if err != nil {
		log.Printf("[report] 规划大纲失败，用兜底大纲: %v", err)
		return fallback
	}
	var out reportOutline
	if json.Unmarshal([]byte(stripFence(txt)), &out) != nil || len(out.Sections) == 0 {
		log.Printf("[report] 大纲 JSON 解析失败，用兜底大纲。原文: %s", truncate(txt, 200))
		return fallback
	}
	if out.Title == "" {
		out.Title = topic
	}
	return out
}

// planItems 为 map 节点把主题拆成一组并行子项（splitBy 为拆分依据提示）。
// 模型不可用/解析失败则用通用兜底分项。
func (o *Orchestrator) planItems(ctx context.Context, topic, splitBy string) []string {
	fallback := []string{"背景与现状", "核心分析", "结论与建议"}
	if !o.pool.Ready() {
		return fallback
	}
	hint := splitBy
	if hint == "" {
		hint = "合理的章节"
	}
	user := fmt.Sprintf("请把主题《%s》拆分为一组「%s」，用于并行撰写。"+
		"只输出 JSON 数组：[\"子项1\",\"子项2\", ...]，3 到 6 项，不要任何多余文字。", topic, hint)
	txt, err := o.pool.Chat(ctx, []llm.ChatMessage{
		{Role: "system", Content: "你擅长把一个任务拆解为可并行处理的若干子项。"},
		{Role: "user", Content: user},
	})
	if err != nil {
		log.Printf("[map] 拆分子项失败，用兜底: %v", err)
		return fallback
	}
	var items []string
	if json.Unmarshal([]byte(stripFence(txt)), &items) != nil || len(items) == 0 {
		log.Printf("[map] 子项 JSON 解析失败，用兜底。原文: %s", truncate(txt, 200))
		return fallback
	}
	return items
}

// writeSections 各章节并行撰写（有界并发），结果按原顺序返回。
func (o *Orchestrator) writeSections(ctx context.Context, topic, kb string, headings []string, tr *execTracer) []reportSection {
	out := make([]reportSection, len(headings))
	sem := make(chan struct{}, reportFanout)
	var wg sync.WaitGroup
	for i, h := range headings {
		wg.Add(1)
		go func(i int, h string) {
			defer wg.Done()
			sem <- struct{}{}
			defer func() { <-sem }()
			node := fmt.Sprintf("section:%d", i)
			end := tr.span(node, "section", fmt.Sprintf("第%d章 %s", i+1, h))
			body := o.writeSection(ctx, topic, kb, h, tr, node)
			end(fmt.Sprintf("成稿 %d 字", len([]rune(body))), nil)
			out[i] = reportSection{Heading: h, Body: body}
		}(i, h)
	}
	wg.Wait()
	return out
}

// writeSection 撰写一章：先 RAG 检索参考资料（若挂了知识库），再让模型成稿。
func (o *Orchestrator) writeSection(ctx context.Context, topic, kb, heading string, tr *execTracer, node string) string {
	refs := o.retrieve(ctx, kb, topic+" "+heading)
	if refs != "" {
		tr.info(node, "section", "检索参考资料", truncate(strings.ReplaceAll(refs, "\n", " "), 120))
	}
	if !o.pool.Ready() {
		if refs != "" {
			return "（模型未配置，以下为检索到的参考资料）\n" + refs
		}
		return "（模型未配置，无法撰写本章。）"
	}
	sys := "你是专业报告撰稿人，语言严谨、条理清晰，使用中文书面语。"
	var ub strings.Builder
	fmt.Fprintf(&ub, "报告主题：%s\n本章标题：%s\n", topic, heading)
	if refs != "" {
		ub.WriteString("可参考的资料（来自知识库检索，请甄别采用，不要照搬）：\n")
		ub.WriteString(refs)
		ub.WriteString("\n")
	}
	ub.WriteString("请就「本章标题」撰写 200–400 字正文。只输出正文，不要重复标题、不要再列提纲。")
	txt, err := o.pool.Chat(ctx, []llm.ChatMessage{{Role: "system", Content: sys}, {Role: "user", Content: ub.String()}})
	if err != nil {
		log.Printf("[report] 撰写「%s」失败: %v", heading, err)
		return "（本章撰写失败：" + err.Error() + "）"
	}
	return strings.TrimSpace(txt)
}

// retrieve 经 mcp-go kb_search 工具检索知识库，整理为可读参考资料。kb 为空或无召回则返回空。
func (o *Orchestrator) retrieve(ctx context.Context, kb, query string) string {
	if o.tools == nil || kb == "" {
		return ""
	}
	cctx, cancel := context.WithTimeout(ctx, toolCallTimeout)
	defer cancel()
	res, err := o.tools.CallTool(cctx, contract.ToolSubjectGo("kb_search"), &contract.ToolCall{
		Tool: "kb_search", Args: map[string]any{"kb": kb, "q": query, "topK": 4},
	})
	if err != nil || res == nil || !res.OK || res.Content == "" || res.Content == "[]" {
		return ""
	}
	var hits []struct {
		Text  string  `json:"text"`
		Score float64 `json:"score"`
	}
	if json.Unmarshal([]byte(res.Content), &hits) != nil {
		return res.Content
	}
	var b strings.Builder
	for i, h := range hits {
		fmt.Fprintf(&b, "%d. %s\n", i+1, strings.TrimSpace(h.Text))
	}
	return b.String()
}

// storeReport 经 mcp-go report_store 把报告源数据（title+sections）落盘，供导出时按需渲染。
func (o *Orchestrator) storeReport(ctx context.Context, taskID, title string, secs []reportSection) bool {
	if o.tools == nil {
		return false
	}
	arr := make([]map[string]any, len(secs))
	for i, s := range secs {
		arr[i] = map[string]any{"heading": s.Heading, "body": s.Body}
	}
	cctx, cancel := context.WithTimeout(ctx, reportRenderWait)
	defer cancel()
	res, err := o.tools.CallTool(cctx, contract.ToolSubjectGo("report_store"), &contract.ToolCall{
		Tool: "report_store", TaskID: taskID,
		Args: map[string]any{"title": title, "task_id": taskID, "sections": arr},
	})
	return err == nil && res != nil && res.OK
}

// renderReport 经 mcp-go report_render 工具渲染 docx 并落盘，返回文件路径（失败返回空）。
func (o *Orchestrator) renderReport(ctx context.Context, taskID, title string, secs []reportSection) string {
	if o.tools == nil {
		return ""
	}
	arr := make([]map[string]any, len(secs))
	for i, s := range secs {
		arr[i] = map[string]any{"heading": s.Heading, "body": s.Body}
	}
	cctx, cancel := context.WithTimeout(ctx, reportRenderWait)
	defer cancel()
	res, err := o.tools.CallTool(cctx, contract.ToolSubjectGo("report_render"), &contract.ToolCall{
		Tool: "report_render", TaskID: taskID,
		Args: map[string]any{"title": title, "task_id": taskID, "sections": arr},
	})
	if err != nil || res == nil || !res.OK {
		log.Printf("[report] report_render 失败: %v", err)
		return ""
	}
	return res.Content
}

// emit 把一段文本作为 Token 流回客户端（报告进度与正文都走这里）。
func (o *Orchestrator) emit(taskID, s string) {
	if err := o.sink.PublishToken(taskID, []byte(s)); err != nil {
		log.Printf("[report] emit token failed: %v", err)
	}
}

// ---- 小工具 ----

func firstNonEmpty(a, b string) string {
	if strings.TrimSpace(a) != "" {
		return a
	}
	return b
}

// stripFence 去掉模型可能包裹的 ```json … ``` 代码围栏。
func stripFence(s string) string {
	s = strings.TrimSpace(s)
	if strings.HasPrefix(s, "```") {
		if i := strings.IndexByte(s, '\n'); i >= 0 {
			s = s[i+1:]
		}
		s = strings.TrimSuffix(strings.TrimSpace(s), "```")
	}
	return strings.TrimSpace(s)
}

func truncate(s string, n int) string {
	r := []rune(s)
	if len(r) <= n {
		return s
	}
	return string(r[:n]) + "…"
}