feat(dispatcher): 输出护栏 —— 发射层脱敏疑似密钥/令牌

补齐 Harness 输出侧:harness.RedactSecrets 识别并脱敏 sk-/AKIA/JWT/Bearer 等
疑似密钥令牌(纯逻辑 + 单测)。runAgent 在每个 token 分片发射前调用(流式无法回收
已发,故逐片脱敏),脱敏会累计进 b.answer(写回历史也是脱敏版);有命中则在
运行·观测打一条'已脱敏 N 处'轨迹。

注:跨分片的密钥可能漏(流式现实),逐片为最佳努力;生产可加滑窗缓冲增强。

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Blizzard
2026-06-18 11:52:53 +08:00
parent 718140239d
commit b7360439ab
4 changed files with 77 additions and 5 deletions
+10 -3
View File
@@ -10,6 +10,7 @@ import (
"github.com/cloudwego/eino/schema"
"github.com/sundynix/sundynix-dispatcher/internal/dsl"
"github.com/sundynix/sundynix-dispatcher/internal/harness"
"github.com/sundynix/sundynix-shared/contract"
)
@@ -233,13 +234,16 @@ func (o *Orchestrator) runAgent(ctx context.Context, taskID string, b *board, sy
msgs, _ := buildMessages(ctx, rc)
tr.emit(node, "model", "start", "模型流式推理", "", 0)
t0 := time.Now()
n := 0
n, redacted := 0, 0
send := func(s string) {
if s == "" {
return
}
_ = o.sink.PublishToken(taskID, []byte(s))
b.answer += s
// 输出护栏:发射前逐片脱敏疑似密钥/令牌(流式无法回收已发,故逐片处理)。
safe, hit := harness.RedactSecrets(s)
redacted += hit
_ = o.sink.PublishToken(taskID, []byte(safe))
b.answer += safe
n++
}
var err error
@@ -252,6 +256,9 @@ func (o *Orchestrator) runAgent(ctx context.Context, taskID string, b *board, sy
tr.emit(node, "model", "error", "模型流式推理", err.Error(), time.Since(t0).Milliseconds())
return
}
if redacted > 0 {
tr.info(node, "system", "输出护栏", fmt.Sprintf("已脱敏 %d 处疑似密钥/令牌", redacted))
}
tr.emit(node, "model", "end", "模型流式推理",
fmt.Sprintf("%d tokens / %d 字", n, len([]rune(b.answer))), time.Since(t0).Milliseconds())
}
@@ -0,0 +1,26 @@
package harness
import "regexp"
// secretPatterns 是输出里疑似密钥/令牌的特征(命中即脱敏,防模型把密钥回吐给用户)。
var secretPatterns = []*regexp.Regexp{
regexp.MustCompile(`sk-[A-Za-z0-9_-]{16,}`), // OpenAI/DeepSeek 风格 key
regexp.MustCompile(`AKIA[0-9A-Z]{16}`), // AWS Access Key ID
regexp.MustCompile(`eyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{4,}`), // JWT
regexp.MustCompile(`(?i)bearer\s+[A-Za-z0-9._-]{16,}`), // Bearer 令牌
}
const redactMark = "[已脱敏]"
// RedactSecrets 把文本里疑似密钥/令牌替换为脱敏标记,返回脱敏后文本与命中次数。
// 输出护栏:在 token 发射前对每个分片调用(流式无法回收已发,故逐片脱敏 + 最终标记)。
func RedactSecrets(s string) (string, int) {
n := 0
for _, re := range secretPatterns {
s = re.ReplaceAllStringFunc(s, func(string) string {
n++
return redactMark
})
}
return s, n
}
@@ -0,0 +1,39 @@
package harness
import (
"strings"
"testing"
)
func TestRedactSecrets(t *testing.T) {
cases := []struct {
in string
wantN int
mustGone string // 脱敏后不应再出现的子串
}{
{"我的 key 是 sk-912cf85b16d04b22bcb95f4576423bfb 别外传", 1, "sk-912cf85b16d04b22bcb95f4576423bfb"},
{"token: AKIAIOSFODNN7EXAMPLE", 1, "AKIAIOSFODNN7EXAMPLE"},
{"Authorization: Bearer abcdef0123456789ABCDEF", 1, "abcdef0123456789ABCDEF"},
{"jwt eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxIn0.abcd1234 完事", 1, "eyJhbGciOiJIUzI1NiJ9"},
}
for _, c := range cases {
out, n := RedactSecrets(c.in)
if n != c.wantN {
t.Errorf("RedactSecrets(%q) 命中数=%d want %d", c.in, n, c.wantN)
}
if strings.Contains(out, c.mustGone) {
t.Errorf("脱敏后仍含密钥: %q", out)
}
if !strings.Contains(out, redactMark) {
t.Errorf("应含脱敏标记: %q", out)
}
}
}
func TestRedactSecrets_Clean(t *testing.T) {
in := "这是一段正常的回答,介绍杭州西湖的历史与十景。"
out, n := RedactSecrets(in)
if n != 0 || out != in {
t.Errorf("正常文本不应被改动: n=%d out=%q", n, out)
}
}