sundynix-agentix/sundynix-gateway/internal/guardrail/guardrail.go

// Package guardrail 实现 Harness 输入护栏的纯检测逻辑（与 HTTP 解耦，便于单测）。
package guardrail

import (
	"regexp"
	"strings"
)

// MaxJSONBytes 是 JSON 请求体上限（文件上传走 multipart，不经此检查）。
const MaxJSONBytes = 256 * 1024

// injectionPatterns 是提示词注入 / 越权诱导的可疑模式（大小写不敏感）。
var injectionPatterns = []struct {
	label string
	re    *regexp.Regexp
}{
	{"忽略既定指令", regexp.MustCompile(`(?i)ignore\s+(all\s+|the\s+)*previous\s+(instructions?|prompts?)`)},
	{"忽略既定指令", regexp.MustCompile(`(?i)disregard\s+(the\s+)?(above|previous|prior)`)},
	{"忽略既定指令", regexp.MustCompile(`忽略(以上|之前|前面|上述|先前)[^。\n]{0,8}(指令|指示|提示|要求|规则|设定)`)},
	{"角色越权", regexp.MustCompile(`(?i)you\s+are\s+now\s+(a|an|the|no longer)`)},
	{"诱导泄露提示词", regexp.MustCompile(`(?i)(reveal|show|print|repeat|expose)\s+(me\s+)?(your\s+|the\s+)*(system\s+)?prompt`)},
	{"诱导泄露提示词", regexp.MustCompile(`(泄露|显示|输出|告诉我|重复)[^。\n]{0,6}(系统)?(提示词|指令)`)},
}

// bannedTerms 是敏感词黑名单（默认空，按需在此扩展；命中即拦截）。
var bannedTerms = []string{}

// Inspect 检查输入是否触发护栏。blocked=true 时返回人类可读的拦截原因。
func Inspect(body []byte) (reason string, blocked bool) {
	if len(body) > MaxJSONBytes {
		return "请求体过大（超过 256KB）", true
	}
	s := string(body)
	for _, p := range injectionPatterns {
		if p.re.MatchString(s) {
			return "疑似提示词注入（" + p.label + "）", true
		}
	}
	for _, w := range bannedTerms {
		if w != "" && strings.Contains(s, w) {
			return "命中敏感词", true
		}
	}
	return "", false
}