Spaces:
Running
Running
package main | |
import ( | |
"bufio" | |
"bytes" | |
"context" | |
"crypto/tls" | |
"encoding/json" | |
"flag" | |
"fmt" | |
"io" | |
"log" | |
"net/http" | |
"os" | |
"os/signal" | |
"strings" | |
"sync" | |
"sync/atomic" | |
"syscall" | |
"time" | |
) | |
// WorkerPool 工作池结构体,用于管理goroutine | |
type WorkerPool struct { | |
taskQueue chan *Task | |
workerCount int | |
shutdownChannel chan struct{} | |
wg sync.WaitGroup | |
} | |
// Task 任务结构体,包含请求处理所需数据 | |
type Task struct { | |
r *http.Request | |
w http.ResponseWriter | |
done chan struct{} | |
reqID string | |
isStream bool | |
hunyuanReq HunyuanRequest | |
} | |
// NewWorkerPool 创建并启动一个新的工作池 | |
func NewWorkerPool(workerCount int, queueSize int) *WorkerPool { | |
pool := &WorkerPool{ | |
taskQueue: make(chan *Task, queueSize), | |
workerCount: workerCount, | |
shutdownChannel: make(chan struct{}), | |
} | |
pool.Start() | |
return pool | |
} | |
// Start 启动工作池中的worker goroutines | |
func (pool *WorkerPool) Start() { | |
// 启动工作goroutine | |
for i := 0; i < pool.workerCount; i++ { | |
pool.wg.Add(1) | |
go func(workerID int) { | |
defer pool.wg.Done() | |
logInfo("Worker %d 已启动", workerID) | |
for { | |
select { | |
case task, ok := <-pool.taskQueue: | |
if !ok { | |
// 队列已关闭,退出worker | |
logInfo("Worker %d 收到队列关闭信号,准备退出", workerID) | |
return | |
} | |
logDebug("Worker %d 处理任务 reqID:%s", workerID, task.reqID) | |
// 处理任务 | |
if task.isStream { | |
err := handleStreamingRequest(task.w, task.r, task.hunyuanReq, task.reqID) | |
if err != nil { | |
logError("Worker %d 处理流式任务失败: %v", workerID, err) | |
} | |
} else { | |
err := handleNonStreamingRequest(task.w, task.r, task.hunyuanReq, task.reqID) | |
if err != nil { | |
logError("Worker %d 处理非流式任务失败: %v", workerID, err) | |
} | |
} | |
// 通知任务完成 | |
close(task.done) | |
case <-pool.shutdownChannel: | |
// 收到关闭信号,退出worker | |
logInfo("Worker %d 收到关闭信号,准备退出", workerID) | |
return | |
} | |
} | |
}(i) | |
} | |
} | |
// SubmitTask 提交任务到工作池,非阻塞 | |
func (pool *WorkerPool) SubmitTask(task *Task) (bool, error) { | |
select { | |
case pool.taskQueue <- task: | |
// 任务成功添加到队列 | |
return true, nil | |
default: | |
// 队列已满 | |
return false, fmt.Errorf("任务队列已满") | |
} | |
} | |
// Shutdown 关闭工作池 | |
func (pool *WorkerPool) Shutdown() { | |
logInfo("正在关闭工作池...") | |
// 发送关闭信号给所有worker | |
close(pool.shutdownChannel) | |
// 等待所有worker退出 | |
pool.wg.Wait() | |
// 关闭任务队列 | |
close(pool.taskQueue) | |
logInfo("工作池已关闭") | |
} | |
// Semaphore 信号量实现,用于限制并发数量 | |
type Semaphore struct { | |
sem chan struct{} | |
} | |
// NewSemaphore 创建新的信号量 | |
func NewSemaphore(size int) *Semaphore { | |
return &Semaphore{ | |
sem: make(chan struct{}, size), | |
} | |
} | |
// Acquire 获取信号量(阻塞) | |
func (s *Semaphore) Acquire() { | |
s.sem <- struct{}{} | |
} | |
// Release 释放信号量 | |
func (s *Semaphore) Release() { | |
<-s.sem | |
} | |
// TryAcquire 尝试获取信号量(非阻塞) | |
func (s *Semaphore) TryAcquire() bool { | |
select { | |
case s.sem <- struct{}{}: | |
return true | |
default: | |
return false | |
} | |
} | |
// 配置结构体用于存储命令行参数 | |
type Config struct { | |
Port string // 代理服务器监听端口 | |
Address string // 代理服务器监听地址 | |
LogLevel string // 日志级别 | |
DevMode bool // 开发模式标志 | |
MaxRetries int // 最大重试次数 | |
Timeout int // 请求超时时间(秒) | |
VerifySSL bool // 是否验证SSL证书 | |
ModelName string // 默认模型名称 | |
BearerToken string // Bearer Token (默认提供公开Token) | |
WorkerCount int // 工作池中的worker数量 | |
QueueSize int // 任务队列大小 | |
MaxConcurrent int // 最大并发请求数 | |
} | |
// 支持的模型列表 | |
var SupportedModels = []string{ | |
"hunyuan-t1-latest", | |
"hunyuan-turbos-latest", | |
} | |
// 腾讯混元 API 目标URL | |
const ( | |
TargetURL = "https://llm.hunyuan.tencent.com/aide/api/v2/triton_image/demo_text_chat/" | |
Version = "1.0.0" // 版本号 | |
) | |
// 日志级别 | |
const ( | |
LogLevelDebug = "debug" | |
LogLevelInfo = "info" | |
LogLevelWarn = "warn" | |
LogLevelError = "error" | |
) | |
// 解析命令行参数并返回 Config 实例 | |
func parseFlags() *Config { | |
cfg := &Config{} | |
flag.StringVar(&cfg.Port, "port", "6666", "Port to listen on") | |
flag.StringVar(&cfg.Address, "address", "localhost", "Address to listen on") | |
flag.StringVar(&cfg.LogLevel, "log-level", LogLevelInfo, "Log level (debug, info, warn, error)") | |
flag.BoolVar(&cfg.DevMode, "dev", false, "Enable development mode with enhanced logging") | |
flag.IntVar(&cfg.MaxRetries, "max-retries", 3, "Maximum number of retries for failed requests") | |
flag.IntVar(&cfg.Timeout, "timeout", 300, "Request timeout in seconds") | |
flag.BoolVar(&cfg.VerifySSL, "verify-ssl", true, "Verify SSL certificates") | |
flag.StringVar(&cfg.ModelName, "model", "hunyuan-t1-latest", "Default Hunyuan model name") | |
flag.StringVar(&cfg.BearerToken, "token", "7auGXNATFSKl7dF", "Bearer token for Hunyuan API") | |
flag.IntVar(&cfg.WorkerCount, "workers", 50, "Number of worker goroutines in the pool") | |
flag.IntVar(&cfg.QueueSize, "queue-size", 500, "Size of the task queue") | |
flag.IntVar(&cfg.MaxConcurrent, "max-concurrent", 100, "Maximum number of concurrent requests") | |
flag.Parse() | |
// 如果开发模式开启,自动设置日志级别为debug | |
if cfg.DevMode && cfg.LogLevel != LogLevelDebug { | |
cfg.LogLevel = LogLevelDebug | |
fmt.Println("开发模式已启用,日志级别设置为debug") | |
} | |
return cfg | |
} | |
// 全局配置变量 | |
var ( | |
appConfig *Config | |
) | |
// 性能指标 | |
var ( | |
requestCounter int64 | |
successCounter int64 | |
errorCounter int64 | |
avgResponseTime int64 | |
latencyHistogram [10]int64 // 0-100ms, 100-200ms, ... >1s | |
queuedRequests int64 // 当前在队列中的请求数 | |
rejectedRequests int64 // 被拒绝的请求数 | |
) | |
// 并发控制组件 | |
var ( | |
workerPool *WorkerPool // 工作池 | |
requestSem *Semaphore // 请求信号量 | |
) | |
// 日志记录器 | |
var ( | |
logger *log.Logger | |
logLevel string | |
logMutex sync.Mutex | |
) | |
// 日志初始化 | |
func initLogger(level string) { | |
logger = log.New(os.Stdout, "[HunyuanAPI] ", log.LstdFlags) | |
logLevel = level | |
} | |
// 根据日志级别记录日志 | |
func logDebug(format string, v ...interface{}) { | |
if logLevel == LogLevelDebug { | |
logMutex.Lock() | |
logger.Printf("[DEBUG] "+format, v...) | |
logMutex.Unlock() | |
} | |
} | |
func logInfo(format string, v ...interface{}) { | |
if logLevel == LogLevelDebug || logLevel == LogLevelInfo { | |
logMutex.Lock() | |
logger.Printf("[INFO] "+format, v...) | |
logMutex.Unlock() | |
} | |
} | |
func logWarn(format string, v ...interface{}) { | |
if logLevel == LogLevelDebug || logLevel == LogLevelInfo || logLevel == LogLevelWarn { | |
logMutex.Lock() | |
logger.Printf("[WARN] "+format, v...) | |
logMutex.Unlock() | |
} | |
} | |
func logError(format string, v ...interface{}) { | |
logMutex.Lock() | |
logger.Printf("[ERROR] "+format, v...) | |
logMutex.Unlock() | |
// 错误计数 | |
atomic.AddInt64(&errorCounter, 1) | |
} | |
// OpenAI/DeepSeek 消息格式 | |
type APIMessage struct { | |
Role string `json:"role"` | |
Content interface{} `json:"content"` // 使用interface{}以支持各种类型 | |
} | |
// OpenAI/DeepSeek 请求格式 | |
type APIRequest struct { | |
Model string `json:"model"` | |
Messages []APIMessage `json:"messages"` | |
Stream bool `json:"stream"` | |
Temperature float64 `json:"temperature,omitempty"` | |
MaxTokens int `json:"max_tokens,omitempty"` | |
} | |
// 腾讯混元请求格式 | |
type HunyuanRequest struct { | |
Stream bool `json:"stream"` | |
Model string `json:"model"` | |
QueryID string `json:"query_id"` | |
Messages []APIMessage `json:"messages"` | |
StreamModeration bool `json:"stream_moderation"` | |
EnableEnhancement bool `json:"enable_enhancement"` | |
} | |
// 腾讯混元响应格式 | |
type HunyuanResponse struct { | |
ID string `json:"id"` | |
Object string `json:"object"` | |
Created int64 `json:"created"` | |
Model string `json:"model"` | |
SystemFingerprint string `json:"system_fingerprint"` | |
Choices []Choice `json:"choices"` | |
Note string `json:"note,omitempty"` | |
} | |
// 选择结构 | |
type Choice struct { | |
Index int `json:"index"` | |
Delta Delta `json:"delta"` | |
FinishReason *string `json:"finish_reason"` | |
} | |
// Delta结构,包含内容和推理内容 | |
type Delta struct { | |
Role string `json:"role,omitempty"` | |
Content string `json:"content,omitempty"` | |
ReasoningContent string `json:"reasoning_content,omitempty"` | |
} | |
// DeepSeek 流式响应格式 | |
type StreamChunk struct { | |
ID string `json:"id"` | |
Object string `json:"object"` | |
Created int64 `json:"created"` | |
Model string `json:"model"` | |
Choices []struct { | |
Index int `json:"index"` | |
FinishReason *string `json:"finish_reason,omitempty"` | |
Delta struct { | |
Role string `json:"role,omitempty"` | |
Content string `json:"content,omitempty"` | |
ReasoningContent string `json:"reasoning_content,omitempty"` | |
} `json:"delta"` | |
} `json:"choices"` | |
} | |
// 非流式响应格式 | |
type CompletionResponse struct { | |
ID string `json:"id"` | |
Object string `json:"object"` | |
Created int64 `json:"created"` | |
Model string `json:"model"` | |
Choices []struct { | |
Index int `json:"index"` | |
FinishReason string `json:"finish_reason"` | |
Message struct { | |
Role string `json:"role"` | |
Content string `json:"content"` | |
ReasoningContent string `json:"reasoning_content,omitempty"` | |
} `json:"message"` | |
} `json:"choices"` | |
Usage struct { | |
PromptTokens int `json:"prompt_tokens"` | |
CompletionTokens int `json:"completion_tokens"` | |
TotalTokens int `json:"total_tokens"` | |
} `json:"usage"` | |
} | |
// 请求计数和互斥锁,用于监控 | |
var ( | |
requestCount uint64 = 0 | |
countMutex sync.Mutex | |
) | |
// 主入口函数 | |
func main() { | |
// 解析配置 | |
appConfig = parseFlags() | |
// 初始化日志 | |
initLogger(appConfig.LogLevel) | |
logInfo("启动服务: TargetURL=%s, Address=%s, Port=%s, Version=%s, LogLevel=%s, 支持模型=%v, BearerToken=***, WorkerCount=%d, QueueSize=%d, MaxConcurrent=%d", | |
TargetURL, appConfig.Address, appConfig.Port, Version, appConfig.LogLevel, SupportedModels, | |
appConfig.WorkerCount, appConfig.QueueSize, appConfig.MaxConcurrent) | |
// 创建工作池和信号量 | |
workerPool = NewWorkerPool(appConfig.WorkerCount, appConfig.QueueSize) | |
requestSem = NewSemaphore(appConfig.MaxConcurrent) | |
logInfo("工作池已创建: %d个worker, 队列大小为%d", appConfig.WorkerCount, appConfig.QueueSize) | |
// 配置更高的并发处理能力 | |
http.DefaultTransport.(*http.Transport).MaxIdleConnsPerHost = 100 | |
http.DefaultTransport.(*http.Transport).MaxIdleConns = 100 | |
http.DefaultTransport.(*http.Transport).IdleConnTimeout = 90 * time.Second | |
// 创建自定义服务器,支持更高并发 | |
server := &http.Server{ | |
Addr: appConfig.Address + ":" + appConfig.Port, | |
ReadTimeout: time.Duration(appConfig.Timeout) * time.Second, | |
WriteTimeout: time.Duration(appConfig.Timeout) * time.Second, | |
IdleTimeout: 120 * time.Second, | |
Handler: nil, // 使用默认的ServeMux | |
} | |
// 创建处理器 | |
http.HandleFunc("/v1/models", func(w http.ResponseWriter, r *http.Request) { | |
setCORSHeaders(w) | |
if r.Method == "OPTIONS" { | |
w.WriteHeader(http.StatusOK) | |
return | |
} | |
handleModelsRequest(w, r) | |
}) | |
http.HandleFunc("/v1/chat/completions", func(w http.ResponseWriter, r *http.Request) { | |
setCORSHeaders(w) | |
if r.Method == "OPTIONS" { | |
w.WriteHeader(http.StatusOK) | |
return | |
} | |
// 计数器增加 | |
countMutex.Lock() | |
requestCount++ | |
currentCount := requestCount | |
countMutex.Unlock() | |
logInfo("收到新请求 #%d", currentCount) | |
// 请求计数 | |
atomic.AddInt64(&requestCounter, 1) | |
// 尝试获取信号量 | |
if !requestSem.TryAcquire() { | |
// 请求数量超过限制 | |
atomic.AddInt64(&rejectedRequests, 1) | |
logWarn("请求 #%d 被拒绝: 当前并发请求数已达上限", currentCount) | |
w.Header().Set("Retry-After", "30") | |
http.Error(w, "Server is busy, please try again later", http.StatusServiceUnavailable) | |
return | |
} | |
// 释放信号量(在函数返回时) | |
defer requestSem.Release() | |
// 处理请求 | |
handleChatCompletionRequestWithPool(w, r, currentCount) | |
}) | |
// 添加健康检查端点 | |
http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) { | |
setCORSHeaders(w) | |
if r.Method == "OPTIONS" { | |
w.WriteHeader(http.StatusOK) | |
return | |
} | |
// 获取各种计数器的值 | |
reqCount := atomic.LoadInt64(&requestCounter) | |
succCount := atomic.LoadInt64(&successCounter) | |
errCount := atomic.LoadInt64(&errorCounter) | |
queuedCount := atomic.LoadInt64(&queuedRequests) | |
rejectedCount := atomic.LoadInt64(&rejectedRequests) | |
// 计算平均响应时间 | |
var avgTime int64 = 0 | |
if reqCount > 0 { | |
avgTime = atomic.LoadInt64(&avgResponseTime) / max(reqCount, 1) | |
} | |
// 构建延迟直方图数据 | |
histogram := make([]int64, 10) | |
for i := 0; i < 10; i++ { | |
histogram[i] = atomic.LoadInt64(&latencyHistogram[i]) | |
} | |
// 构建响应 | |
stats := map[string]interface{}{ | |
"status": "ok", | |
"version": Version, | |
"requests": reqCount, | |
"success": succCount, | |
"errors": errCount, | |
"queued": queuedCount, | |
"rejected": rejectedCount, | |
"avg_time_ms": avgTime, | |
"histogram_ms": histogram, | |
"worker_count": workerPool.workerCount, | |
"queue_size": len(workerPool.taskQueue), | |
"queue_capacity": cap(workerPool.taskQueue), | |
"queue_percent": float64(len(workerPool.taskQueue)) / float64(cap(workerPool.taskQueue)) * 100, | |
"concurrent_limit": appConfig.MaxConcurrent, | |
} | |
w.Header().Set("Content-Type", "application/json") | |
w.WriteHeader(http.StatusOK) | |
json.NewEncoder(w).Encode(stats) | |
}) | |
// 创建停止通道 | |
stop := make(chan os.Signal, 1) | |
signal.Notify(stop, os.Interrupt, syscall.SIGTERM) | |
// 在goroutine中启动服务器 | |
go func() { | |
logInfo("Starting proxy server on %s", server.Addr) | |
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { | |
logError("Failed to start server: %v", err) | |
os.Exit(1) | |
} | |
}() | |
// 等待停止信号 | |
<-stop | |
// 创建上下文用于优雅关闭 | |
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) | |
defer cancel() | |
// 优雅关闭服务器 | |
logInfo("Server is shutting down...") | |
if err := server.Shutdown(ctx); err != nil { | |
logError("Server shutdown failed: %v", err) | |
} | |
// 关闭工作池 | |
workerPool.Shutdown() | |
logInfo("Server gracefully stopped") | |
} | |
// 设置CORS头 | |
func setCORSHeaders(w http.ResponseWriter) { | |
w.Header().Set("Access-Control-Allow-Origin", "*") | |
w.Header().Set("Access-Control-Allow-Methods", "POST, GET, OPTIONS") | |
w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization") | |
} | |
// 验证消息格式 | |
func validateMessages(messages []APIMessage) (bool, string) { | |
reqID := generateRequestID() | |
logDebug("[reqID:%s] 验证消息格式", reqID) | |
if messages == nil || len(messages) == 0 { | |
return false, "Messages array is required" | |
} | |
for _, msg := range messages { | |
if msg.Role == "" || msg.Content == nil { | |
return false, "Invalid message format: each message must have role and content" | |
} | |
} | |
return true, "" | |
} | |
// 从请求头中提取令牌 | |
func extractToken(r *http.Request) (string, error) { | |
// 获取 Authorization 头部 | |
authHeader := r.Header.Get("Authorization") | |
if authHeader == "" { | |
return "", fmt.Errorf("missing Authorization header") | |
} | |
// 验证格式并提取令牌 | |
if !strings.HasPrefix(authHeader, "Bearer ") { | |
return "", fmt.Errorf("invalid Authorization header format, must start with 'Bearer '") | |
} | |
// 提取令牌值 | |
token := strings.TrimPrefix(authHeader, "Bearer ") | |
if token == "" { | |
return "", fmt.Errorf("empty token in Authorization header") | |
} | |
return token, nil | |
} | |
// 转换任意类型的内容为字符串 | |
func contentToString(content interface{}) string { | |
if content == nil { | |
return "" | |
} | |
switch v := content.(type) { | |
case string: | |
return v | |
default: | |
jsonBytes, err := json.Marshal(v) | |
if err != nil { | |
logWarn("将内容转换为JSON失败: %v", err) | |
return "" | |
} | |
return string(jsonBytes) | |
} | |
} | |
// 生成请求ID | |
func generateQueryID() string { | |
return fmt.Sprintf("%s%d", getRandomString(8), time.Now().UnixNano()) | |
} | |
// 判断模型是否在支持列表中 | |
func isModelSupported(modelName string) bool { | |
for _, supportedModel := range SupportedModels { | |
if modelName == supportedModel { | |
return true | |
} | |
} | |
return false | |
} | |
// 处理模型列表请求 | |
func handleModelsRequest(w http.ResponseWriter, r *http.Request) { | |
logInfo("处理模型列表请求") | |
// 返回模型列表 | |
w.Header().Set("Content-Type", "application/json") | |
w.WriteHeader(http.StatusOK) | |
// 构建模型数据 | |
modelData := make([]map[string]interface{}, 0, len(SupportedModels)) | |
for _, model := range SupportedModels { | |
modelData = append(modelData, map[string]interface{}{ | |
"id": model, | |
"object": "model", | |
"created": time.Now().Unix(), | |
"owned_by": "TencentCloud", | |
"capabilities": map[string]interface{}{ | |
"chat": true, | |
"completions": true, | |
"reasoning": true, | |
}, | |
}) | |
} | |
modelsList := map[string]interface{}{ | |
"object": "list", | |
"data": modelData, | |
} | |
json.NewEncoder(w).Encode(modelsList) | |
} | |
// 处理聊天补全请求(使用工作池) | |
func handleChatCompletionRequestWithPool(w http.ResponseWriter, r *http.Request, requestNum uint64) { | |
reqID := generateRequestID() | |
startTime := time.Now() | |
logInfo("[reqID:%s] 处理聊天补全请求 #%d", reqID, requestNum) | |
// 设置超时上下文 | |
ctx, cancel := context.WithTimeout(r.Context(), time.Duration(appConfig.Timeout)*time.Second) | |
defer cancel() | |
// 包含超时上下文的请求 | |
r = r.WithContext(ctx) | |
// 添加恢复机制,防止panic | |
defer func() { | |
if r := recover(); r != nil { | |
logError("[reqID:%s] 处理请求时发生panic: %v", reqID, r) | |
http.Error(w, "Internal server error", http.StatusInternalServerError) | |
} | |
}() | |
// 解析请求体 | |
var apiReq APIRequest | |
if err := json.NewDecoder(r.Body).Decode(&apiReq); err != nil { | |
logError("[reqID:%s] 解析请求失败: %v", reqID, err) | |
http.Error(w, "Invalid request body", http.StatusBadRequest) | |
return | |
} | |
// 验证消息格式 | |
valid, errMsg := validateMessages(apiReq.Messages) | |
if !valid { | |
logError("[reqID:%s] 消息格式验证失败: %s", reqID, errMsg) | |
http.Error(w, errMsg, http.StatusBadRequest) | |
return | |
} | |
// 是否使用流式处理 | |
isStream := apiReq.Stream | |
// 确定使用的模型 | |
modelName := appConfig.ModelName | |
if apiReq.Model != "" { | |
// 检查请求的模型是否是我们支持的 | |
if isModelSupported(apiReq.Model) { | |
modelName = apiReq.Model | |
} else { | |
logWarn("[reqID:%s] 请求的模型 %s 不支持,使用默认模型 %s", reqID, apiReq.Model, modelName) | |
} | |
} | |
logInfo("[reqID:%s] 使用模型: %s", reqID, modelName) | |
// 创建混元API请求 | |
hunyuanReq := HunyuanRequest{ | |
Stream: true, // 混元API总是使用流式响应 | |
Model: modelName, | |
QueryID: generateQueryID(), | |
Messages: apiReq.Messages, | |
StreamModeration: true, | |
EnableEnhancement: false, | |
} | |
// 创建任务 | |
task := &Task{ | |
r: r, | |
w: w, | |
done: make(chan struct{}), | |
reqID: reqID, | |
isStream: isStream, | |
hunyuanReq: hunyuanReq, | |
} | |
// 添加到任务队列 | |
atomic.AddInt64(&queuedRequests, 1) | |
submitted, err := workerPool.SubmitTask(task) | |
if !submitted { | |
atomic.AddInt64(&queuedRequests, -1) | |
atomic.AddInt64(&rejectedRequests, 1) | |
logError("[reqID:%s] 提交任务失败: %v", reqID, err) | |
w.Header().Set("Retry-After", "60") | |
http.Error(w, "Server queue is full, please try again later", http.StatusServiceUnavailable) | |
return | |
} | |
logInfo("[reqID:%s] 任务已提交到队列", reqID) | |
// 等待任务完成或超时 | |
select { | |
case <-task.done: | |
// 任务已完成 | |
logInfo("[reqID:%s] 任务已完成", reqID) | |
case <-r.Context().Done(): | |
// 请求被取消或超时 | |
logWarn("[reqID:%s] 请求被取消或超时", reqID) | |
// 注意:虽然请求被取消,但worker可能仍在处理任务 | |
} | |
// 请求处理完成,更新指标 | |
atomic.AddInt64(&queuedRequests, -1) | |
elapsed := time.Since(startTime).Milliseconds() | |
// 更新延迟直方图 | |
bucketIndex := min(int(elapsed/100), 9) | |
atomic.AddInt64(&latencyHistogram[bucketIndex], 1) | |
// 更新平均响应时间 | |
atomic.AddInt64(&avgResponseTime, elapsed) | |
if r.Context().Err() == nil { | |
// 成功计数增加 | |
atomic.AddInt64(&successCounter, 1) | |
logInfo("[reqID:%s] 请求处理成功,耗时: %dms", reqID, elapsed) | |
} else { | |
logError("[reqID:%s] 请求处理失败: %v, 耗时: %dms", reqID, r.Context().Err(), elapsed) | |
} | |
} | |
// 处理聊天补全请求(原实现,已不使用) | |
func handleChatCompletionRequest(w http.ResponseWriter, r *http.Request) { | |
reqID := generateRequestID() | |
startTime := time.Now() | |
logInfo("[reqID:%s] 处理聊天补全请求", reqID) | |
// 解析请求体 | |
var apiReq APIRequest | |
if err := json.NewDecoder(r.Body).Decode(&apiReq); err != nil { | |
logError("[reqID:%s] 解析请求失败: %v", reqID, err) | |
http.Error(w, "Invalid request body", http.StatusBadRequest) | |
return | |
} | |
// 验证消息格式 | |
valid, errMsg := validateMessages(apiReq.Messages) | |
if !valid { | |
logError("[reqID:%s] 消息格式验证失败: %s", reqID, errMsg) | |
http.Error(w, errMsg, http.StatusBadRequest) | |
return | |
} | |
// 是否使用流式处理 | |
isStream := apiReq.Stream | |
// 确定使用的模型 | |
modelName := appConfig.ModelName | |
if apiReq.Model != "" { | |
// 检查请求的模型是否是我们支持的 | |
if isModelSupported(apiReq.Model) { | |
modelName = apiReq.Model | |
} else { | |
logWarn("[reqID:%s] 请求的模型 %s 不支持,使用默认模型 %s", reqID, apiReq.Model, modelName) | |
} | |
} | |
logInfo("[reqID:%s] 使用模型: %s", reqID, modelName) | |
// 创建混元API请求 | |
hunyuanReq := HunyuanRequest{ | |
Stream: true, // 混元API总是使用流式响应 | |
Model: modelName, | |
QueryID: generateQueryID(), | |
Messages: apiReq.Messages, | |
StreamModeration: true, | |
EnableEnhancement: false, | |
} | |
// 转发请求到混元API | |
var responseErr error | |
if isStream { | |
responseErr = handleStreamingRequest(w, r, hunyuanReq, reqID) | |
} else { | |
responseErr = handleNonStreamingRequest(w, r, hunyuanReq, reqID) | |
} | |
// 请求处理完成,更新指标 | |
elapsed := time.Since(startTime).Milliseconds() | |
// 更新延迟直方图 | |
bucketIndex := min(int(elapsed/100), 9) | |
atomic.AddInt64(&latencyHistogram[bucketIndex], 1) | |
// 更新平均响应时间 | |
atomic.AddInt64(&avgResponseTime, elapsed) | |
if responseErr == nil { | |
// 成功计数增加 | |
atomic.AddInt64(&successCounter, 1) | |
logInfo("[reqID:%s] 请求处理成功,耗时: %dms", reqID, elapsed) | |
} else { | |
logError("[reqID:%s] 请求处理失败: %v, 耗时: %dms", reqID, responseErr, elapsed) | |
} | |
} | |
// 安全的HTTP客户端,支持禁用SSL验证 | |
func getHTTPClient() *http.Client { | |
tr := &http.Transport{ | |
MaxIdleConnsPerHost: 100, | |
IdleConnTimeout: 90 * time.Second, | |
TLSClientConfig: nil, // 默认配置 | |
} | |
// 如果配置了禁用SSL验证 | |
if !appConfig.VerifySSL { | |
tr.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} | |
} | |
return &http.Client{ | |
Timeout: time.Duration(appConfig.Timeout) * time.Second, | |
Transport: tr, | |
} | |
} | |
// 处理流式请求 | |
func handleStreamingRequest(w http.ResponseWriter, r *http.Request, hunyuanReq HunyuanRequest, reqID string) error { | |
logInfo("[reqID:%s] 处理流式请求", reqID) | |
// 序列化请求 | |
jsonData, err := json.Marshal(hunyuanReq) | |
if err != nil { | |
logError("[reqID:%s] 序列化请求失败: %v", reqID, err) | |
http.Error(w, "Internal server error", http.StatusInternalServerError) | |
return err | |
} | |
// 创建请求 | |
httpReq, err := http.NewRequestWithContext(r.Context(), "POST", TargetURL, bytes.NewBuffer(jsonData)) | |
if err != nil { | |
logError("[reqID:%s] 创建请求失败: %v", reqID, err) | |
http.Error(w, "Internal server error", http.StatusInternalServerError) | |
return err | |
} | |
// 设置请求头 | |
httpReq.Header.Set("Content-Type", "application/json") | |
httpReq.Header.Set("Model", hunyuanReq.Model) | |
setCommonHeaders(httpReq) | |
// 创建HTTP客户端 | |
client := getHTTPClient() | |
// 发送请求 | |
resp, err := client.Do(httpReq) | |
if err != nil { | |
logError("[reqID:%s] 发送请求失败: %v", reqID, err) | |
http.Error(w, "Failed to connect to API", http.StatusBadGateway) | |
return err | |
} | |
defer resp.Body.Close() | |
// 检查响应状态 | |
if resp.StatusCode != http.StatusOK { | |
logError("[reqID:%s] API返回非200状态码: %d", reqID, resp.StatusCode) | |
bodyBytes, _ := io.ReadAll(resp.Body) | |
logError("[reqID:%s] 错误响应内容: %s", reqID, string(bodyBytes)) | |
http.Error(w, fmt.Sprintf("API error with status code: %d", resp.StatusCode), resp.StatusCode) | |
return fmt.Errorf("API返回非200状态码: %d", resp.StatusCode) | |
} | |
// 设置响应头 | |
w.Header().Set("Content-Type", "text/event-stream") | |
w.Header().Set("Cache-Control", "no-cache") | |
w.Header().Set("Connection", "keep-alive") | |
// 创建响应ID和时间戳 | |
respID := fmt.Sprintf("chatcmpl-%s", getRandomString(10)) | |
createdTime := time.Now().Unix() | |
// 创建读取器 | |
reader := bufio.NewReaderSize(resp.Body, 16384) | |
// 创建Flusher | |
flusher, ok := w.(http.Flusher) | |
if !ok { | |
logError("[reqID:%s] Streaming not supported", reqID) | |
http.Error(w, "Streaming not supported", http.StatusInternalServerError) | |
return fmt.Errorf("streaming not supported") | |
} | |
// 发送角色块 | |
roleChunk := createRoleChunk(respID, createdTime, hunyuanReq.Model) | |
w.Write([]byte("data: " + string(roleChunk) + "\n\n")) | |
flusher.Flush() | |
// 持续读取响应 | |
for { | |
// 添加超时检测 | |
select { | |
case <-r.Context().Done(): | |
logWarn("[reqID:%s] 请求超时或被客户端取消", reqID) | |
return fmt.Errorf("请求超时或被取消") | |
default: | |
// 继续处理 | |
} | |
// 读取一行数据 | |
line, err := reader.ReadBytes('\n') | |
if err != nil { | |
if err != io.EOF { | |
logError("[reqID:%s] 读取响应出错: %v", reqID, err) | |
return err | |
} | |
break | |
} | |
// 处理数据行 | |
lineStr := string(line) | |
if strings.HasPrefix(lineStr, "data: ") { | |
jsonStr := strings.TrimPrefix(lineStr, "data: ") | |
jsonStr = strings.TrimSpace(jsonStr) | |
// 特殊处理[DONE]消息 | |
if jsonStr == "[DONE]" { | |
logDebug("[reqID:%s] 收到[DONE]消息", reqID) | |
w.Write([]byte("data: [DONE]\n\n")) | |
flusher.Flush() | |
break | |
} | |
// 解析混元响应 | |
var hunyuanResp HunyuanResponse | |
if err := json.Unmarshal([]byte(jsonStr), &hunyuanResp); err != nil { | |
logWarn("[reqID:%s] 解析JSON失败: %v, data: %s", reqID, err, jsonStr) | |
continue | |
} | |
// 处理各种类型的内容 | |
for _, choice := range hunyuanResp.Choices { | |
if choice.Delta.Content != "" { | |
// 发送内容块 | |
contentChunk := createContentChunk(respID, createdTime, hunyuanReq.Model, choice.Delta.Content) | |
w.Write([]byte("data: " + string(contentChunk) + "\n\n")) | |
flusher.Flush() | |
} | |
if choice.Delta.ReasoningContent != "" { | |
// 发送推理内容块 | |
reasoningChunk := createReasoningChunk(respID, createdTime, hunyuanReq.Model, choice.Delta.ReasoningContent) | |
w.Write([]byte("data: " + string(reasoningChunk) + "\n\n")) | |
flusher.Flush() | |
} | |
// 处理完成标志 | |
if choice.FinishReason != nil { | |
finishReason := *choice.FinishReason | |
if finishReason != "" { | |
doneChunk := createDoneChunk(respID, createdTime, hunyuanReq.Model, finishReason) | |
w.Write([]byte("data: " + string(doneChunk) + "\n\n")) | |
flusher.Flush() | |
} | |
} | |
} | |
} | |
} | |
// 发送结束信号(如果没有正常结束) | |
finishReason := "stop" | |
doneChunk := createDoneChunk(respID, createdTime, hunyuanReq.Model, finishReason) | |
w.Write([]byte("data: " + string(doneChunk) + "\n\n")) | |
w.Write([]byte("data: [DONE]\n\n")) | |
flusher.Flush() | |
return nil | |
} | |
// 处理非流式请求 | |
func handleNonStreamingRequest(w http.ResponseWriter, r *http.Request, hunyuanReq HunyuanRequest, reqID string) error { | |
logInfo("[reqID:%s] 处理非流式请求", reqID) | |
// 序列化请求 | |
jsonData, err := json.Marshal(hunyuanReq) | |
if err != nil { | |
logError("[reqID:%s] 序列化请求失败: %v", reqID, err) | |
http.Error(w, "Internal server error", http.StatusInternalServerError) | |
return err | |
} | |
// 创建请求 | |
httpReq, err := http.NewRequestWithContext(r.Context(), "POST", TargetURL, bytes.NewBuffer(jsonData)) | |
if err != nil { | |
logError("[reqID:%s] 创建请求失败: %v", reqID, err) | |
http.Error(w, "Internal server error", http.StatusInternalServerError) | |
return err | |
} | |
// 设置请求头 | |
httpReq.Header.Set("Content-Type", "application/json") | |
httpReq.Header.Set("Model", hunyuanReq.Model) | |
setCommonHeaders(httpReq) | |
// 创建HTTP客户端 | |
client := getHTTPClient() | |
// 发送请求 | |
resp, err := client.Do(httpReq) | |
if err != nil { | |
logError("[reqID:%s] 发送请求失败: %v", reqID, err) | |
http.Error(w, "Failed to connect to API", http.StatusBadGateway) | |
return err | |
} | |
defer resp.Body.Close() | |
// 检查响应状态 | |
if resp.StatusCode != http.StatusOK { | |
logError("[reqID:%s] API返回非200状态码: %d", reqID, resp.StatusCode) | |
bodyBytes, _ := io.ReadAll(resp.Body) | |
logError("[reqID:%s] 错误响应内容: %s", reqID, string(bodyBytes)) | |
http.Error(w, fmt.Sprintf("API error with status code: %d", resp.StatusCode), resp.StatusCode) | |
return fmt.Errorf("API返回非200状态码: %d", resp.StatusCode) | |
} | |
// 读取完整的流式响应 | |
bodyBytes, err := io.ReadAll(resp.Body) | |
if err != nil { | |
logError("[reqID:%s] 读取响应失败: %v", reqID, err) | |
http.Error(w, "Failed to read API response", http.StatusInternalServerError) | |
return err | |
} | |
// 解析流式响应并提取完整内容 | |
fullContent, reasoningContent, err := extractFullContentFromStream(bodyBytes, reqID) | |
if err != nil { | |
logError("[reqID:%s] 解析流式响应失败: %v", reqID, err) | |
http.Error(w, "Failed to parse streaming response", http.StatusInternalServerError) | |
return err | |
} | |
// 构建完整的非流式响应 | |
completionResponse := CompletionResponse{ | |
ID: fmt.Sprintf("chatcmpl-%s", getRandomString(10)), | |
Object: "chat.completion", | |
Created: time.Now().Unix(), | |
Model: hunyuanReq.Model, | |
Choices: []struct { | |
Index int `json:"index"` | |
FinishReason string `json:"finish_reason"` | |
Message struct { | |
Role string `json:"role"` | |
Content string `json:"content"` | |
ReasoningContent string `json:"reasoning_content,omitempty"` | |
} `json:"message"` | |
}{ | |
{ | |
Index: 0, | |
FinishReason: "stop", | |
Message: struct { | |
Role string `json:"role"` | |
Content string `json:"content"` | |
ReasoningContent string `json:"reasoning_content,omitempty"` | |
}{ | |
Role: "assistant", | |
Content: fullContent, | |
ReasoningContent: reasoningContent, | |
}, | |
}, | |
}, | |
Usage: struct { | |
PromptTokens int `json:"prompt_tokens"` | |
CompletionTokens int `json:"completion_tokens"` | |
TotalTokens int `json:"total_tokens"` | |
}{ | |
PromptTokens: len(formatMessages(hunyuanReq.Messages)) / 4, | |
CompletionTokens: len(fullContent) / 4, | |
TotalTokens: (len(formatMessages(hunyuanReq.Messages)) + len(fullContent)) / 4, | |
}, | |
} | |
// 返回响应 | |
w.Header().Set("Content-Type", "application/json") | |
if err := json.NewEncoder(w).Encode(completionResponse); err != nil { | |
logError("[reqID:%s] 编码响应失败: %v", reqID, err) | |
http.Error(w, "Failed to encode response", http.StatusInternalServerError) | |
return err | |
} | |
return nil | |
} | |
// 从流式响应中提取完整内容 | |
func extractFullContentFromStream(bodyBytes []byte, reqID string) (string, string, error) { | |
bodyStr := string(bodyBytes) | |
lines := strings.Split(bodyStr, "\n") | |
// 内容累积器 | |
var contentBuilder strings.Builder | |
var reasoningBuilder strings.Builder | |
// 解析每一行 | |
for _, line := range lines { | |
if strings.HasPrefix(line, "data: ") && !strings.Contains(line, "[DONE]") { | |
jsonStr := strings.TrimPrefix(line, "data: ") | |
jsonStr = strings.TrimSpace(jsonStr) | |
// 解析JSON | |
var hunyuanResp HunyuanResponse | |
if err := json.Unmarshal([]byte(jsonStr), &hunyuanResp); err != nil { | |
continue // 跳过无效JSON | |
} | |
// 提取内容和推理内容 | |
for _, choice := range hunyuanResp.Choices { | |
if choice.Delta.Content != "" { | |
contentBuilder.WriteString(choice.Delta.Content) | |
} | |
if choice.Delta.ReasoningContent != "" { | |
reasoningBuilder.WriteString(choice.Delta.ReasoningContent) | |
} | |
} | |
} | |
} | |
return contentBuilder.String(), reasoningBuilder.String(), nil | |
} | |
// 创建角色块 | |
func createRoleChunk(id string, created int64, model string) []byte { | |
chunk := StreamChunk{ | |
ID: id, | |
Object: "chat.completion.chunk", | |
Created: created, | |
Model: model, | |
Choices: []struct { | |
Index int `json:"index"` | |
FinishReason *string `json:"finish_reason,omitempty"` | |
Delta struct { | |
Role string `json:"role,omitempty"` | |
Content string `json:"content,omitempty"` | |
ReasoningContent string `json:"reasoning_content,omitempty"` | |
} `json:"delta"` | |
}{ | |
{ | |
Index: 0, | |
Delta: struct { | |
Role string `json:"role,omitempty"` | |
Content string `json:"content,omitempty"` | |
ReasoningContent string `json:"reasoning_content,omitempty"` | |
}{ | |
Role: "assistant", | |
}, | |
}, | |
}, | |
} | |
data, _ := json.Marshal(chunk) | |
return data | |
} | |
// 创建内容块 | |
func createContentChunk(id string, created int64, model string, content string) []byte { | |
chunk := StreamChunk{ | |
ID: id, | |
Object: "chat.completion.chunk", | |
Created: created, | |
Model: model, | |
Choices: []struct { | |
Index int `json:"index"` | |
FinishReason *string `json:"finish_reason,omitempty"` | |
Delta struct { | |
Role string `json:"role,omitempty"` | |
Content string `json:"content,omitempty"` | |
ReasoningContent string `json:"reasoning_content,omitempty"` | |
} `json:"delta"` | |
}{ | |
{ | |
Index: 0, | |
Delta: struct { | |
Role string `json:"role,omitempty"` | |
Content string `json:"content,omitempty"` | |
ReasoningContent string `json:"reasoning_content,omitempty"` | |
}{ | |
Content: content, | |
}, | |
}, | |
}, | |
} | |
data, _ := json.Marshal(chunk) | |
return data | |
} | |
// 创建推理内容块 | |
func createReasoningChunk(id string, created int64, model string, reasoningContent string) []byte { | |
chunk := StreamChunk{ | |
ID: id, | |
Object: "chat.completion.chunk", | |
Created: created, | |
Model: model, | |
Choices: []struct { | |
Index int `json:"index"` | |
FinishReason *string `json:"finish_reason,omitempty"` | |
Delta struct { | |
Role string `json:"role,omitempty"` | |
Content string `json:"content,omitempty"` | |
ReasoningContent string `json:"reasoning_content,omitempty"` | |
} `json:"delta"` | |
}{ | |
{ | |
Index: 0, | |
Delta: struct { | |
Role string `json:"role,omitempty"` | |
Content string `json:"content,omitempty"` | |
ReasoningContent string `json:"reasoning_content,omitempty"` | |
}{ | |
ReasoningContent: reasoningContent, | |
}, | |
}, | |
}, | |
} | |
data, _ := json.Marshal(chunk) | |
return data | |
} | |
// 创建完成块 | |
func createDoneChunk(id string, created int64, model string, reason string) []byte { | |
finishReason := reason | |
chunk := StreamChunk{ | |
ID: id, | |
Object: "chat.completion.chunk", | |
Created: created, | |
Model: model, | |
Choices: []struct { | |
Index int `json:"index"` | |
FinishReason *string `json:"finish_reason,omitempty"` | |
Delta struct { | |
Role string `json:"role,omitempty"` | |
Content string `json:"content,omitempty"` | |
ReasoningContent string `json:"reasoning_content,omitempty"` | |
} `json:"delta"` | |
}{ | |
{ | |
Index: 0, | |
FinishReason: &finishReason, | |
Delta: struct { | |
Role string `json:"role,omitempty"` | |
Content string `json:"content,omitempty"` | |
ReasoningContent string `json:"reasoning_content,omitempty"` | |
}{}, | |
}, | |
}, | |
} | |
data, _ := json.Marshal(chunk) | |
return data | |
} | |
// 设置常见的请求头 - 参考Python版本 | |
func setCommonHeaders(req *http.Request) { | |
req.Header.Set("accept", "*/*") | |
req.Header.Set("accept-language", "zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7") | |
req.Header.Set("authorization", "Bearer "+appConfig.BearerToken) | |
req.Header.Set("dnt", "1") | |
req.Header.Set("origin", "https://llm.hunyuan.tencent.com") | |
req.Header.Set("polaris", "stream-server-online-sbs-10697") | |
req.Header.Set("priority", "u=1, i") | |
req.Header.Set("referer", "https://llm.hunyuan.tencent.com/") | |
req.Header.Set("sec-ch-ua", "\"Chromium\";v=\"134\", \"Not:A-Brand\";v=\"24\", \"Google Chrome\";v=\"134\"") | |
req.Header.Set("sec-ch-ua-mobile", "?0") | |
req.Header.Set("sec-ch-ua-platform", "\"Windows\"") | |
req.Header.Set("sec-fetch-dest", "empty") | |
req.Header.Set("sec-fetch-mode", "cors") | |
req.Header.Set("sec-fetch-site", "same-origin") | |
req.Header.Set("staffname", "staryxzhang") | |
req.Header.Set("wsid", "10697") | |
req.Header.Set("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36") | |
} | |
// 生成请求ID | |
func generateRequestID() string { | |
return fmt.Sprintf("%x", time.Now().UnixNano()) | |
} | |
// 生成随机字符串 | |
func getRandomString(length int) string { | |
const charset = "abcdefghijklmnopqrstuvwxyz0123456789" | |
b := make([]byte, length) | |
for i := range b { | |
b[i] = charset[time.Now().UnixNano()%int64(len(charset))] | |
time.Sleep(1 * time.Nanosecond) | |
} | |
return string(b) | |
} | |
// 格式化消息为字符串 | |
func formatMessages(messages []APIMessage) string { | |
var result strings.Builder | |
for _, msg := range messages { | |
result.WriteString(msg.Role) | |
result.WriteString(": ") | |
result.WriteString(contentToString(msg.Content)) | |
result.WriteString("\n") | |
} | |
return result.String() | |
} | |
// 获取两个整数中的最小值 | |
func min(a, b int) int { | |
if a < b { | |
return a | |
} | |
return b | |
} | |
// 获取两个整数中的最大值 | |
func max(a, b int64) int64 { | |
if a > b { | |
return a | |
} | |
return b | |
} |