feat(dispatcher): 输出护栏 —— 发射层脱敏疑似密钥/令牌

补齐 Harness 输出侧：harness.RedactSecrets 识别并脱敏 sk-/AKIA/JWT/Bearer 等疑似密钥令牌（纯逻辑 + 单测）。runAgent 在每个 token 分片发射前调用（流式无法回收已发，故逐片脱敏），脱敏会累计进 b.answer（写回历史也是脱敏版）；有命中则在运行·观测打一条'已脱敏 N 处'轨迹。注：跨分片的密钥可能漏（流式现实），逐片为最佳努力；生产可加滑窗缓冲增强。 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-18 11:52:53 +08:00
parent 718140239d
commit b7360439ab
4 changed files with 77 additions and 5 deletions
@@ -10,6 +10,7 @@ import (
 	"github.com/cloudwego/eino/schema"

 	"github.com/sundynix/sundynix-dispatcher/internal/dsl"
+	"github.com/sundynix/sundynix-dispatcher/internal/harness"
 	"github.com/sundynix/sundynix-shared/contract"
 )

@@ -233,13 +234,16 @@ func (o *Orchestrator) runAgent(ctx context.Context, taskID string, b *board, sy
 	msgs, _ := buildMessages(ctx, rc)
 	tr.emit(node, "model", "start", "模型流式推理", "", 0)
 	t0 := time.Now()
-	n := 0
+	n, redacted := 0, 0
 	send := func(s string) {
 		if s == "" {
 			return
 		}
-		_ = o.sink.PublishToken(taskID, []byte(s))
-		b.answer += s
+		// 输出护栏：发射前逐片脱敏疑似密钥/令牌（流式无法回收已发，故逐片处理）。
+		safe, hit := harness.RedactSecrets(s)
+		redacted += hit
+		_ = o.sink.PublishToken(taskID, []byte(safe))
+		b.answer += safe
 		n++
 	}
 	var err error
@@ -252,6 +256,9 @@ func (o *Orchestrator) runAgent(ctx context.Context, taskID string, b *board, sy
 		tr.emit(node, "model", "error", "模型流式推理", err.Error(), time.Since(t0).Milliseconds())
 		return
 	}
+	if redacted > 0 {
+		tr.info(node, "system", "输出护栏", fmt.Sprintf("已脱敏 %d 处疑似密钥/令牌", redacted))
+	}
 	tr.emit(node, "model", "end", "模型流式推理",
 		fmt.Sprintf("%d tokens / %d 字", n, len([]rune(b.answer))), time.Since(t0).Milliseconds())
 }
@@ -0,0 +1,26 @@
+package harness
+
+import "regexp"
+
+// secretPatterns 是输出里疑似密钥/令牌的特征（命中即脱敏，防模型把密钥回吐给用户）。
+var secretPatterns = []*regexp.Regexp{
+	regexp.MustCompile(`sk-[A-Za-z0-9_-]{16,}`),                                  // OpenAI/DeepSeek 风格 key
+	regexp.MustCompile(`AKIA[0-9A-Z]{16}`),                                       // AWS Access Key ID
+	regexp.MustCompile(`eyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{4,}`), // JWT
+	regexp.MustCompile(`(?i)bearer\s+[A-Za-z0-9._-]{16,}`),                       // Bearer 令牌
+}
+
+const redactMark = "[已脱敏]"
+
+// RedactSecrets 把文本里疑似密钥/令牌替换为脱敏标记，返回脱敏后文本与命中次数。
+// 输出护栏：在 token 发射前对每个分片调用（流式无法回收已发，故逐片脱敏 + 最终标记）。
+func RedactSecrets(s string) (string, int) {
+	n := 0
+	for _, re := range secretPatterns {
+		s = re.ReplaceAllStringFunc(s, func(string) string {
+			n++
+			return redactMark
+		})
+	}
+	return s, n
+}
@@ -0,0 +1,39 @@
+package harness
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestRedactSecrets(t *testing.T) {
+	cases := []struct {
+		in       string
+		wantN    int
+		mustGone string // 脱敏后不应再出现的子串
+	}{
+		{"我的 key 是 sk-912cf85b16d04b22bcb95f4576423bfb 别外传", 1, "sk-912cf85b16d04b22bcb95f4576423bfb"},
+		{"token: AKIAIOSFODNN7EXAMPLE", 1, "AKIAIOSFODNN7EXAMPLE"},
+		{"Authorization: Bearer abcdef0123456789ABCDEF", 1, "abcdef0123456789ABCDEF"},
+		{"jwt eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxIn0.abcd1234 完事", 1, "eyJhbGciOiJIUzI1NiJ9"},
+	}
+	for _, c := range cases {
+		out, n := RedactSecrets(c.in)
+		if n != c.wantN {
+			t.Errorf("RedactSecrets(%q) 命中数=%d want %d", c.in, n, c.wantN)
+		}
+		if strings.Contains(out, c.mustGone) {
+			t.Errorf("脱敏后仍含密钥: %q", out)
+		}
+		if !strings.Contains(out, redactMark) {
+			t.Errorf("应含脱敏标记: %q", out)
+		}
+	}
+}
+
+func TestRedactSecrets_Clean(t *testing.T) {
+	in := "这是一段正常的回答，介绍杭州西湖的历史与十景。"
+	out, n := RedactSecrets(in)
+	if n != 0 || out != in {
+		t.Errorf("正常文本不应被改动: n=%d out=%q", n, out)
+	}
+}