Files
sundynix-agentix/sundynix-dispatcher/internal/llm/pool.go
T
Blizzard 3c65189f30 feat: 配置控制面 + LLM Pool 接第三方在线 API (OpenAI 兼容)
后端从占位回显变为真实生成:管理员经控制面登记/激活模型,Gateway 经 NATS
下发,Dispatcher 热更新 LLM Pool,Eino 图用 OpenAI 兼容流式真实推理。

- shared: contract.ModelConfig(provider/base_url/api_key/model) + 配置 subjects;
  bus.RequestModelConfig/ServeModelConfig/Publish/Subscribe ModelConfigUpdated
- gateway: store.LLMModel→sundynix_model(AutoMigrate,唯一激活) + admin REST
  (GET/POST/active/delete/test models, api_key 脱敏) + main ServeModelConfig +
  变更广播; 路由 /api/v1/admin/models*
- dispatcher: llm.Pool OpenAI 兼容 SSE 流式客户端(ChatStream) + 热更新配置 +
  未配置则降级桩; poolModel.Ready()?真实流式:注入记忆的桩; main 取配置+订阅
- 开发期接在线 API 不拉本地模型(见 llm-provider-strategy memory)
- 验证: 4 模块 build✓ + e2e PASS; mock OpenAI 服务 live 跑通——登记/测试连接✓/
  激活→NATS 热更新→提交→真实 SSE 流出 mock 回复, mock 日志证明端点被调用且
  注入画像(老王)进了模型上下文

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-10 15:41:39 +08:00

149 lines
3.7 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Package llm 抽象 LLM PoolvLLM / Ollama / 第三方在线 API)的负载均衡与流式推理。
package llm
import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"strings"
"sync"
"time"
"github.com/sundynix/sundynix-shared/contract"
)
// ChatMessage 是一条对话消息(role: system/user/assistant)。
type ChatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
// Pool 维护当前激活的后端配置(由控制面经 NATS 下发,可热更新)。
type Pool struct {
mu sync.RWMutex
cfg *contract.ModelConfig
hc *http.Client
}
func NewPool() *Pool {
return &Pool{hc: &http.Client{Timeout: 120 * time.Second}}
}
// SetConfig 热更新后端配置(控制面变更时调用)。
func (p *Pool) SetConfig(cfg *contract.ModelConfig) {
p.mu.Lock()
p.cfg = cfg
p.mu.Unlock()
if cfg != nil {
// 不打印 api_key。
fmt.Printf("[llm] model config set: provider=%s base=%s model=%s\n", cfg.Provider, cfg.BaseURL, cfg.Model)
}
}
func (p *Pool) config() *contract.ModelConfig {
p.mu.RLock()
defer p.mu.RUnlock()
return p.cfg
}
// Ready 报告是否已配置可用后端。
func (p *Pool) Ready() bool { return p.config().Ready() }
// ChatStream 以 OpenAI 兼容协议流式推理,逐 token 回调 onToken。
// 仅在 Ready() 时可用(调用方据此决定真实推理或降级桩)。
func (p *Pool) ChatStream(ctx context.Context, msgs []ChatMessage, onToken func(string)) error {
cfg := p.config()
if !cfg.Ready() {
return fmt.Errorf("no model configured")
}
body, _ := json.Marshal(map[string]any{
"model": cfg.Model,
"messages": msgs,
"stream": true,
})
req, err := http.NewRequestWithContext(ctx, http.MethodPost, cfg.BaseURL+"/chat/completions", bytes.NewReader(body))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
if cfg.APIKey != "" {
req.Header.Set("Authorization", "Bearer "+cfg.APIKey)
}
resp, err := p.hc.Do(req)
if err != nil {
return fmt.Errorf("llm request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
buf := new(bytes.Buffer)
_, _ = buf.ReadFrom(resp.Body)
return fmt.Errorf("llm http %d: %s", resp.StatusCode, strings.TrimSpace(buf.String()))
}
// 解析 OpenAI 兼容 SSEdata: {choices:[{delta:{content}}]} … data: [DONE]
sc := bufio.NewScanner(resp.Body)
sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
for sc.Scan() {
line := strings.TrimSpace(sc.Text())
if !strings.HasPrefix(line, "data:") {
continue
}
payload := strings.TrimSpace(strings.TrimPrefix(line, "data:"))
if payload == "[DONE]" {
break
}
var chunk struct {
Choices []struct {
Delta struct {
Content string `json:"content"`
} `json:"delta"`
} `json:"choices"`
}
if json.Unmarshal([]byte(payload), &chunk) != nil {
continue
}
if len(chunk.Choices) > 0 && chunk.Choices[0].Delta.Content != "" {
onToken(chunk.Choices[0].Delta.Content)
}
}
return sc.Err()
}
// ---- 占位降级(未配置后端时)----
// 占位参数:模拟真实后端的 TTFT(首 token 延迟) 与逐 token 间隔。
const (
timeToFirstToken = 700 * time.Millisecond
interTokenDelay = 60 * time.Millisecond
)
// StreamText 按节奏把给定文本流式回调(未配置真实后端时的降级桩)。
func (p *Pool) StreamText(ctx context.Context, text string, onToken func([]byte)) error {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(timeToFirstToken):
}
for _, tok := range tokenize(text) {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
onToken([]byte(tok))
time.Sleep(interTokenDelay)
}
return nil
}
func tokenize(s string) []string {
out := make([]string, 0, len(s))
for _, r := range s {
out = append(out, string(r))
}
return out
}