feat(observability): 执行可视化 — 节点级实时轨迹(运行·观测)

把任务执行做成可观测:Dispatcher 在每个节点/阶段发结构化 ExecEvent,
经独立 NATS 通道回流,前端逐节点点亮(状态/耗时/工具入参产出)。

- shared: contract.ExecEvent + ExecSubject(sundynix.exec.<id>,与 Token 流分流);
  bus.PublishExec/CompleteExec/SubscribeExec(core NATS,复用结束头)
- dispatcher: execTracer(自增 Seq 保序 + span 自动计耗时);
  Orchestrator 加 ExecSink;通用图(init 召回 / 各 tool 入参→产出 / prompt / model
  首token+token数)与报告编排(规划大纲 / 各章并行 start-end / 渲染)全程埋点
- gateway: SubscribeExec + GET /tasks/:id/exec SSE(与 token 流并行)
- desktop: streamExec + deriveNodes(按 node 归并 start/end/error/info);
  复用组件 ExecTrace(竖向轨道,按 kind 着色,运行中脉冲灯);
  新 RunsView(运行·观测:轨迹+输出双栏);BottomDrawer 轨迹/工具调用 tab 接真实数据;
  ReportView 加执行轨迹栏;左导航「运行」置就绪

实测:
- 报告任务 /exec:规划(2680ms,4章) → 4 章并行(seq 交错,各~7-8s 重叠=真并行,
  每章带 docs 知识库检索预览+成稿字数) → 渲染(docx 落盘)
- 通用图 /exec:tool:kb_search(678ms,入参→Milvus 产出) → prompt(2消息) →
  model(首token 860ms / 4 tokens)
- 浏览器(Preview):报告页执行轨迹逐节点点亮、章节带耗时/字数/检索片段,完成后下载 Word

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Blizzard
2026-06-12 14:29:28 +08:00
parent ba8c6b3c43
commit cdc5b3a847
19 changed files with 587 additions and 63 deletions
@@ -5,6 +5,7 @@ import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"log"
"strings"
@@ -38,11 +39,13 @@ type Orchestrator struct {
breaker *harness.CircuitBreaker
sink TokenSink
tools ToolCaller
exec ExecSink
}
// NewOrchestrator 持有依赖;图按任务的 DSL 在 Handle 内动态编译。
func NewOrchestrator(pool *llm.Pool, breaker *harness.CircuitBreaker, sink TokenSink, tools ToolCaller) (*Orchestrator, error) {
return &Orchestrator{pool: pool, breaker: breaker, sink: sink, tools: tools}, nil
// exec 为执行可视化事件出口(可为 nil,则不发轨迹事件)。
func NewOrchestrator(pool *llm.Pool, breaker *harness.CircuitBreaker, sink TokenSink, tools ToolCaller, exec ExecSink) (*Orchestrator, error) {
return &Orchestrator{pool: pool, breaker: breaker, sink: sink, tools: tools, exec: exec}, nil
}
// Handle 消费一个任务:按 DSL 编译 Eino 图并执行,把 Token 流回流到 sundynix.streams.<id>。
@@ -51,22 +54,30 @@ func (o *Orchestrator) Handle(ctx context.Context, t *contract.Task) error {
log.Printf("[eino] circuit open, drop task %s", t.ID)
return nil
}
tr := o.tracer(t.ID)
defer tr.done()
// 报告生成走专用多步编排(规划→分章并行检索撰写→汇聚→渲染 Word),而非通用对话图。
if intent, _ := t.Meta[contract.MetaIntent].(string); intent == contract.IntentReport {
return o.handleReport(ctx, t)
return o.handleReport(ctx, t, tr)
}
log.Printf("[eino] task %s received (graph=%d bytes), compiling DSL → Eino graph...", t.ID, len(t.Graph))
tr.info("task", "system", "任务受理", fmt.Sprintf("DSL %d 字节,编译 Eino 图", len(t.Graph)))
run, err := o.compileFlow(ctx, t)
endCompile := tr.span("compile", "system", "编译 Eino 图")
run, err := o.compileFlow(ctx, t, tr)
if err != nil {
endCompile("", err)
log.Printf("[eino] task %s compile error: %v", t.ID, err)
_ = o.sink.CompleteStream(t.ID)
o.breaker.Report(false)
return err
}
endCompile("图编译完成", nil)
stream, err := run.Stream(ctx, t)
if err != nil {
tr.emit("model", "model", "error", "模型推理", err.Error(), 0)
log.Printf("[eino] task %s graph error: %v", t.ID, err)
_ = o.sink.CompleteStream(t.ID)
o.breaker.Report(false)
@@ -76,6 +87,7 @@ func (o *Orchestrator) Handle(ctx context.Context, t *contract.Task) error {
n := 0
var answer strings.Builder
t0 := time.Now()
for {
chunk, rerr := stream.Recv()
if errors.Is(rerr, io.EOF) {
@@ -88,6 +100,9 @@ func (o *Orchestrator) Handle(ctx context.Context, t *contract.Task) error {
if chunk == nil || chunk.Content == "" {
continue
}
if n == 0 {
tr.emit("model", "model", "start", "模型流式推理", fmt.Sprintf("首 token %dms", time.Since(t0).Milliseconds()), 0)
}
if perr := o.sink.PublishToken(t.ID, []byte(chunk.Content)); perr != nil {
log.Printf("[eino] publish token failed: %v", perr)
break
@@ -95,6 +110,7 @@ func (o *Orchestrator) Handle(ctx context.Context, t *contract.Task) error {
answer.WriteString(chunk.Content)
n++
}
tr.emit("model", "model", "end", "模型流式推理", fmt.Sprintf("%d tokens / %d 字", n, len([]rune(answer.String()))), time.Since(t0).Milliseconds())
if cerr := o.sink.CompleteStream(t.ID); cerr != nil {
log.Printf("[eino] complete stream failed: %v", cerr)