feat(gateway): 可观测性 —— Prometheus 指标 + 结构化日志 + 探针
往"生产可运维"推一步(网关前门):
- Prometheus /metrics:sundynix_http_requests_total{method,route,status}、
request_duration_seconds 直方图、requests_in_flight。route 用 c.FullPath()
路由模板(/tasks/:id/...)避免按真实路径高基数。
- 结构化访问日志:slog JSON 到 stderr(request_id/method/route/status/latency_ms/
ip/uid/bytes),替代 gin 默认文本日志;gin.New()+Recovery 自管中间件链。
- RequestID 中间件:生成/透传 X-Request-ID,写上下文+响应头,供日志关联。
- 探针:/healthz(liveness,不查依赖)、/readyz(readiness,DB+Redis 就绪才 200,
否则 503),供 k8s 等导流判断;/api/v1/health 深度聚合保留。
- 三个根端点不挂业务鉴权(/metrics 生产应由网络层限制抓取来源)。
验证:单测(计数 +1 / X-Request-ID 生成与透传);实跑 /healthz 200、/readyz 200
(db,redis ready)、/metrics 输出真实指标、访问日志 JSON 正常、X-Request-ID 回写。
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -95,6 +95,22 @@ func (h *Handler) StreamTask(c *gin.Context) {
|
||||
})
|
||||
}
|
||||
|
||||
// Healthz: GET /healthz —— 存活探针(liveness):进程能应答即 200,不查依赖。
|
||||
func (h *Handler) Healthz(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, gin.H{"status": "ok"})
|
||||
}
|
||||
|
||||
// Readyz: GET /readyz —— 就绪探针(readiness):核心依赖(DB/Redis)可用才 200,否则 503。
|
||||
// 供 k8s 等编排器在依赖未就绪时暂不导流。NATS 在启动时即连(连不上会 fatal),故不单列。
|
||||
func (h *Handler) Readyz(c *gin.Context) {
|
||||
deps := gin.H{"db": h.db.Enabled(), "redis": h.cache.Enabled()}
|
||||
if h.db.Enabled() && h.cache.Enabled() {
|
||||
c.JSON(http.StatusOK, gin.H{"status": "ready", "deps": deps})
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"status": "not_ready", "deps": deps})
|
||||
}
|
||||
|
||||
// Health: GET /api/v1/health —— 聚合各依赖子系统健康,供桌面端顶栏五盏灯实时点亮。
|
||||
// gateway/db/redis/nats 网关本地可判;milvus/neo4j 经 mcp-go health 工具取(不可用则置否)。
|
||||
func (h *Handler) Health(c *gin.Context) {
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"encoding/hex"
|
||||
"log/slog"
|
||||
"os"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
)
|
||||
|
||||
// CtxRequestID 是请求 ID 在 gin.Context 中的键。
|
||||
const CtxRequestID = "request_id"
|
||||
|
||||
// ---- Prometheus 指标 ----
|
||||
|
||||
var (
|
||||
httpRequests = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "sundynix_http_requests_total",
|
||||
Help: "HTTP 请求总数(按方法/路由模板/状态码)。",
|
||||
}, []string{"method", "route", "status"})
|
||||
|
||||
httpDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Name: "sundynix_http_request_duration_seconds",
|
||||
Help: "HTTP 请求耗时(秒)。",
|
||||
Buckets: prometheus.DefBuckets,
|
||||
}, []string{"method", "route"})
|
||||
|
||||
httpInFlight = promauto.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "sundynix_http_requests_in_flight",
|
||||
Help: "当前处理中的 HTTP 请求数。",
|
||||
})
|
||||
)
|
||||
|
||||
// accessLogger 是结构化访问日志器(JSON 到 stderr)。
|
||||
var accessLogger = slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo}))
|
||||
|
||||
// RequestID 为每个请求生成/透传 X-Request-ID,写入上下文与响应头,供日志关联。
|
||||
func RequestID() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
id := c.GetHeader("X-Request-ID")
|
||||
if id == "" {
|
||||
id = newRequestID()
|
||||
}
|
||||
c.Set(CtxRequestID, id)
|
||||
c.Header("X-Request-ID", id)
|
||||
c.Next()
|
||||
}
|
||||
}
|
||||
|
||||
// Observe 记录 Prometheus 指标 + 结构化访问日志。放在中间件链较前位置。
|
||||
func Observe() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
start := time.Now()
|
||||
httpInFlight.Inc()
|
||||
c.Next()
|
||||
httpInFlight.Dec()
|
||||
|
||||
route := c.FullPath() // 路由模板(/tasks/:id/...),避免按真实路径产生高基数
|
||||
if route == "" {
|
||||
route = "unmatched"
|
||||
}
|
||||
status := c.Writer.Status()
|
||||
dur := time.Since(start)
|
||||
method := c.Request.Method
|
||||
|
||||
httpRequests.WithLabelValues(method, route, strconv.Itoa(status)).Inc()
|
||||
httpDuration.WithLabelValues(method, route).Observe(dur.Seconds())
|
||||
|
||||
uid, _ := c.Get(CtxUserID)
|
||||
rid, _ := c.Get(CtxRequestID)
|
||||
accessLogger.Info("http",
|
||||
"request_id", rid,
|
||||
"method", method,
|
||||
"route", route,
|
||||
"path", c.Request.URL.Path,
|
||||
"status", status,
|
||||
"latency_ms", dur.Milliseconds(),
|
||||
"ip", c.ClientIP(),
|
||||
"uid", uid,
|
||||
"bytes", c.Writer.Size(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
func newRequestID() string {
|
||||
var b [8]byte
|
||||
_, _ = rand.Read(b[:])
|
||||
return hex.EncodeToString(b[:])
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||
)
|
||||
|
||||
func newEngine() *gin.Engine {
|
||||
gin.SetMode(gin.TestMode)
|
||||
r := gin.New()
|
||||
r.Use(RequestID(), Observe())
|
||||
r.GET("/ping", func(c *gin.Context) { c.String(http.StatusOK, "pong") })
|
||||
return r
|
||||
}
|
||||
|
||||
func TestObserve_CountsAndRequestID(t *testing.T) {
|
||||
r := newEngine()
|
||||
before := testutil.ToFloat64(httpRequests.WithLabelValues("GET", "/ping", "200"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
r.ServeHTTP(w, httptest.NewRequest(http.MethodGet, "/ping", nil))
|
||||
|
||||
if w.Code != 200 {
|
||||
t.Fatalf("状态码=%d", w.Code)
|
||||
}
|
||||
if w.Header().Get("X-Request-ID") == "" {
|
||||
t.Error("应自动生成并回写 X-Request-ID")
|
||||
}
|
||||
after := testutil.ToFloat64(httpRequests.WithLabelValues("GET", "/ping", "200"))
|
||||
if after != before+1 {
|
||||
t.Errorf("请求计数应 +1:before=%v after=%v", before, after)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRequestID_PropagatesIncoming(t *testing.T) {
|
||||
r := newEngine()
|
||||
w := httptest.NewRecorder()
|
||||
req := httptest.NewRequest(http.MethodGet, "/ping", nil)
|
||||
req.Header.Set("X-Request-ID", "trace-abc-123")
|
||||
r.ServeHTTP(w, req)
|
||||
if got := w.Header().Get("X-Request-ID"); got != "trace-abc-123" {
|
||||
t.Errorf("应透传入站 X-Request-ID,got %q", got)
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"os"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
|
||||
"github.com/sundynix/sundynix-gateway/internal/blob"
|
||||
"github.com/sundynix/sundynix-gateway/internal/handler"
|
||||
@@ -15,13 +16,22 @@ import (
|
||||
|
||||
// New 构建带有 Guardrail / 限流中间件的 Gin 引擎。
|
||||
func New(db *store.Postgres, cache *store.Redis, bus *nats.Bus, blobStore *blob.Store) *gin.Engine {
|
||||
r := gin.Default()
|
||||
r.Use(cors()) // 桌面端/浏览器跨源访问(开发期放开)
|
||||
r := gin.New()
|
||||
r.Use(gin.Recovery()) // panic 兜底
|
||||
r.Use(middleware.RequestID()) // 生成/透传 X-Request-ID(日志关联)
|
||||
r.Use(middleware.Observe()) // Prometheus 指标 + 结构化访问日志(替代 gin 默认文本日志)
|
||||
r.Use(cors()) // 桌面端/浏览器跨源访问
|
||||
r.Use(middleware.RateLimit(cache))
|
||||
r.Use(middleware.Auth()) // 解析 Bearer JWT,注入已验证 userID(非阻断)
|
||||
r.Use(middleware.Guardrail()) // Harness: Input Guardrail
|
||||
|
||||
h := handler.New(db, cache, bus, blobStore)
|
||||
|
||||
// 可观测性根端点:Prometheus 抓取 + k8s 存活/就绪探针(不挂业务中间件鉴权)。
|
||||
r.GET("/metrics", gin.WrapH(promhttp.Handler()))
|
||||
r.GET("/healthz", h.Healthz)
|
||||
r.GET("/readyz", h.Readyz)
|
||||
|
||||
api := r.Group("/api/v1")
|
||||
{
|
||||
// —— 公开:鉴权端点 / 健康 / 按 task_id 寻址的 SSE 与导出(EventSource/下载无法带 Bearer)——
|
||||
|
||||
Reference in New Issue
Block a user