From b7360439ab4dca814b31a85ccab8ef174e08eb58 Mon Sep 17 00:00:00 2001 From: Blizzard Date: Thu, 18 Jun 2026 11:52:53 +0800 Subject: [PATCH] =?UTF-8?q?feat(dispatcher):=20=E8=BE=93=E5=87=BA=E6=8A=A4?= =?UTF-8?q?=E6=A0=8F=20=E2=80=94=E2=80=94=20=E5=8F=91=E5=B0=84=E5=B1=82?= =?UTF-8?q?=E8=84=B1=E6=95=8F=E7=96=91=E4=BC=BC=E5=AF=86=E9=92=A5/?= =?UTF-8?q?=E4=BB=A4=E7=89=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 补齐 Harness 输出侧:harness.RedactSecrets 识别并脱敏 sk-/AKIA/JWT/Bearer 等 疑似密钥令牌(纯逻辑 + 单测)。runAgent 在每个 token 分片发射前调用(流式无法回收 已发,故逐片脱敏),脱敏会累计进 b.answer(写回历史也是脱敏版);有命中则在 运行·观测打一条'已脱敏 N 处'轨迹。 注:跨分片的密钥可能漏(流式现实),逐片为最佳努力;生产可加滑窗缓冲增强。 Co-Authored-By: Claude Opus 4.8 --- PROGRESS.md | 4 +- sundynix-dispatcher/internal/eino/graph.go | 13 +++++-- .../internal/harness/output.go | 26 +++++++++++++ .../internal/harness/output_test.go | 39 +++++++++++++++++++ 4 files changed, 77 insertions(+), 5 deletions(-) create mode 100644 sundynix-dispatcher/internal/harness/output.go create mode 100644 sundynix-dispatcher/internal/harness/output_test.go diff --git a/PROGRESS.md b/PROGRESS.md index 124655d..70d2c16 100644 --- a/PROGRESS.md +++ b/PROGRESS.md @@ -32,7 +32,7 @@ - [x] 独立运维控制台 sundynix-admin(模型 / 数据源页) - [x] SSE 回流:Token 流 / 执行轨迹 / 入库进度 - [x] Harness **输入**护栏(拦提示词注入 + 超大体,纯逻辑 `internal/guardrail` + 单测 + 实跑验证) -- [ ] 🟡 Harness **输出**护栏(应在 dispatcher token 发射层做,网关侧会破坏 SSE 流式 —— 见路线图) +- [x] Harness **输出**护栏(dispatcher 发射层逐片脱敏疑似密钥/令牌 sk-/AKIA/JWT/Bearer + 轨迹标记 + 单测) - [ ] 🟡 商业化与计费模块(占位,仅统计任务数) ## 第 3 层 · MESSAGE BUS(NATS 零拷贝骨干网) @@ -90,7 +90,7 @@ - [x] **真实登录 / 鉴权(JWT)** —— 后端 + 前端闭环已完成 ✅ - [x] **代码解释器 + 安全沙箱**(AST 守卫 + Docker 隔离已落地 ✅;生产可换 gVisor/Kata) -- [ ] **Harness 余下**:输出护栏(dispatcher token 发射层)(熔断降级 ✅、输入护栏 ✅、LLM 自动化评测 ✅ 已完成) +- [x] **Harness 三件全完成** ✅:熔断降级 · 输入护栏 · LLM 评测 · 输出护栏(密钥脱敏) - [ ] **长期记忆抽取** + external_api 工具 - [ ] **计费 / 商业化**真实实现 - [ ] 微服务化拆分(Morph B)—— 现为 Monolith First,**按设计如此,非缺陷** diff --git a/sundynix-dispatcher/internal/eino/graph.go b/sundynix-dispatcher/internal/eino/graph.go index 290e59d..38f2a04 100644 --- a/sundynix-dispatcher/internal/eino/graph.go +++ b/sundynix-dispatcher/internal/eino/graph.go @@ -10,6 +10,7 @@ import ( "github.com/cloudwego/eino/schema" "github.com/sundynix/sundynix-dispatcher/internal/dsl" + "github.com/sundynix/sundynix-dispatcher/internal/harness" "github.com/sundynix/sundynix-shared/contract" ) @@ -233,13 +234,16 @@ func (o *Orchestrator) runAgent(ctx context.Context, taskID string, b *board, sy msgs, _ := buildMessages(ctx, rc) tr.emit(node, "model", "start", "模型流式推理", "", 0) t0 := time.Now() - n := 0 + n, redacted := 0, 0 send := func(s string) { if s == "" { return } - _ = o.sink.PublishToken(taskID, []byte(s)) - b.answer += s + // 输出护栏:发射前逐片脱敏疑似密钥/令牌(流式无法回收已发,故逐片处理)。 + safe, hit := harness.RedactSecrets(s) + redacted += hit + _ = o.sink.PublishToken(taskID, []byte(safe)) + b.answer += safe n++ } var err error @@ -252,6 +256,9 @@ func (o *Orchestrator) runAgent(ctx context.Context, taskID string, b *board, sy tr.emit(node, "model", "error", "模型流式推理", err.Error(), time.Since(t0).Milliseconds()) return } + if redacted > 0 { + tr.info(node, "system", "输出护栏", fmt.Sprintf("已脱敏 %d 处疑似密钥/令牌", redacted)) + } tr.emit(node, "model", "end", "模型流式推理", fmt.Sprintf("%d tokens / %d 字", n, len([]rune(b.answer))), time.Since(t0).Milliseconds()) } diff --git a/sundynix-dispatcher/internal/harness/output.go b/sundynix-dispatcher/internal/harness/output.go new file mode 100644 index 0000000..439af41 --- /dev/null +++ b/sundynix-dispatcher/internal/harness/output.go @@ -0,0 +1,26 @@ +package harness + +import "regexp" + +// secretPatterns 是输出里疑似密钥/令牌的特征(命中即脱敏,防模型把密钥回吐给用户)。 +var secretPatterns = []*regexp.Regexp{ + regexp.MustCompile(`sk-[A-Za-z0-9_-]{16,}`), // OpenAI/DeepSeek 风格 key + regexp.MustCompile(`AKIA[0-9A-Z]{16}`), // AWS Access Key ID + regexp.MustCompile(`eyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{4,}`), // JWT + regexp.MustCompile(`(?i)bearer\s+[A-Za-z0-9._-]{16,}`), // Bearer 令牌 +} + +const redactMark = "[已脱敏]" + +// RedactSecrets 把文本里疑似密钥/令牌替换为脱敏标记,返回脱敏后文本与命中次数。 +// 输出护栏:在 token 发射前对每个分片调用(流式无法回收已发,故逐片脱敏 + 最终标记)。 +func RedactSecrets(s string) (string, int) { + n := 0 + for _, re := range secretPatterns { + s = re.ReplaceAllStringFunc(s, func(string) string { + n++ + return redactMark + }) + } + return s, n +} diff --git a/sundynix-dispatcher/internal/harness/output_test.go b/sundynix-dispatcher/internal/harness/output_test.go new file mode 100644 index 0000000..981eb48 --- /dev/null +++ b/sundynix-dispatcher/internal/harness/output_test.go @@ -0,0 +1,39 @@ +package harness + +import ( + "strings" + "testing" +) + +func TestRedactSecrets(t *testing.T) { + cases := []struct { + in string + wantN int + mustGone string // 脱敏后不应再出现的子串 + }{ + {"我的 key 是 sk-912cf85b16d04b22bcb95f4576423bfb 别外传", 1, "sk-912cf85b16d04b22bcb95f4576423bfb"}, + {"token: AKIAIOSFODNN7EXAMPLE", 1, "AKIAIOSFODNN7EXAMPLE"}, + {"Authorization: Bearer abcdef0123456789ABCDEF", 1, "abcdef0123456789ABCDEF"}, + {"jwt eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxIn0.abcd1234 完事", 1, "eyJhbGciOiJIUzI1NiJ9"}, + } + for _, c := range cases { + out, n := RedactSecrets(c.in) + if n != c.wantN { + t.Errorf("RedactSecrets(%q) 命中数=%d want %d", c.in, n, c.wantN) + } + if strings.Contains(out, c.mustGone) { + t.Errorf("脱敏后仍含密钥: %q", out) + } + if !strings.Contains(out, redactMark) { + t.Errorf("应含脱敏标记: %q", out) + } + } +} + +func TestRedactSecrets_Clean(t *testing.T) { + in := "这是一段正常的回答,介绍杭州西湖的历史与十景。" + out, n := RedactSecrets(in) + if n != 0 || out != in { + t.Errorf("正常文本不应被改动: n=%d out=%q", n, out) + } +}