hunyuan2api / hunyuan2api.go
BlueSkyXN's picture
0.13.1.1 (#1)
ceb4d0d verified
package main
import (
"bufio"
"bytes"
"context"
"crypto/tls"
"encoding/json"
"flag"
"fmt"
"io"
"log"
"net/http"
"os"
"os/signal"
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
)
// WorkerPool 工作池结构体,用于管理goroutine
type WorkerPool struct {
taskQueue chan *Task
workerCount int
shutdownChannel chan struct{}
wg sync.WaitGroup
}
// Task 任务结构体,包含请求处理所需数据
type Task struct {
r *http.Request
w http.ResponseWriter
done chan struct{}
reqID string
isStream bool
hunyuanReq HunyuanRequest
}
// NewWorkerPool 创建并启动一个新的工作池
func NewWorkerPool(workerCount int, queueSize int) *WorkerPool {
pool := &WorkerPool{
taskQueue: make(chan *Task, queueSize),
workerCount: workerCount,
shutdownChannel: make(chan struct{}),
}
pool.Start()
return pool
}
// Start 启动工作池中的worker goroutines
func (pool *WorkerPool) Start() {
// 启动工作goroutine
for i := 0; i < pool.workerCount; i++ {
pool.wg.Add(1)
go func(workerID int) {
defer pool.wg.Done()
logInfo("Worker %d 已启动", workerID)
for {
select {
case task, ok := <-pool.taskQueue:
if !ok {
// 队列已关闭,退出worker
logInfo("Worker %d 收到队列关闭信号,准备退出", workerID)
return
}
logDebug("Worker %d 处理任务 reqID:%s", workerID, task.reqID)
// 处理任务
if task.isStream {
err := handleStreamingRequest(task.w, task.r, task.hunyuanReq, task.reqID)
if err != nil {
logError("Worker %d 处理流式任务失败: %v", workerID, err)
}
} else {
err := handleNonStreamingRequest(task.w, task.r, task.hunyuanReq, task.reqID)
if err != nil {
logError("Worker %d 处理非流式任务失败: %v", workerID, err)
}
}
// 通知任务完成
close(task.done)
case <-pool.shutdownChannel:
// 收到关闭信号,退出worker
logInfo("Worker %d 收到关闭信号,准备退出", workerID)
return
}
}
}(i)
}
}
// SubmitTask 提交任务到工作池,非阻塞
func (pool *WorkerPool) SubmitTask(task *Task) (bool, error) {
select {
case pool.taskQueue <- task:
// 任务成功添加到队列
return true, nil
default:
// 队列已满
return false, fmt.Errorf("任务队列已满")
}
}
// Shutdown 关闭工作池
func (pool *WorkerPool) Shutdown() {
logInfo("正在关闭工作池...")
// 发送关闭信号给所有worker
close(pool.shutdownChannel)
// 等待所有worker退出
pool.wg.Wait()
// 关闭任务队列
close(pool.taskQueue)
logInfo("工作池已关闭")
}
// Semaphore 信号量实现,用于限制并发数量
type Semaphore struct {
sem chan struct{}
}
// NewSemaphore 创建新的信号量
func NewSemaphore(size int) *Semaphore {
return &Semaphore{
sem: make(chan struct{}, size),
}
}
// Acquire 获取信号量(阻塞)
func (s *Semaphore) Acquire() {
s.sem <- struct{}{}
}
// Release 释放信号量
func (s *Semaphore) Release() {
<-s.sem
}
// TryAcquire 尝试获取信号量(非阻塞)
func (s *Semaphore) TryAcquire() bool {
select {
case s.sem <- struct{}{}:
return true
default:
return false
}
}
// 配置结构体用于存储命令行参数
type Config struct {
Port string // 代理服务器监听端口
Address string // 代理服务器监听地址
LogLevel string // 日志级别
DevMode bool // 开发模式标志
MaxRetries int // 最大重试次数
Timeout int // 请求超时时间(秒)
VerifySSL bool // 是否验证SSL证书
ModelName string // 默认模型名称
BearerToken string // Bearer Token (默认提供公开Token)
WorkerCount int // 工作池中的worker数量
QueueSize int // 任务队列大小
MaxConcurrent int // 最大并发请求数
}
// 支持的模型列表
var SupportedModels = []string{
"hunyuan-t1-latest",
"hunyuan-turbos-latest",
}
// 腾讯混元 API 目标URL
const (
TargetURL = "https://llm.hunyuan.tencent.com/aide/api/v2/triton_image/demo_text_chat/"
Version = "1.0.0" // 版本号
)
// 日志级别
const (
LogLevelDebug = "debug"
LogLevelInfo = "info"
LogLevelWarn = "warn"
LogLevelError = "error"
)
// 解析命令行参数并返回 Config 实例
func parseFlags() *Config {
cfg := &Config{}
flag.StringVar(&cfg.Port, "port", "6666", "Port to listen on")
flag.StringVar(&cfg.Address, "address", "localhost", "Address to listen on")
flag.StringVar(&cfg.LogLevel, "log-level", LogLevelInfo, "Log level (debug, info, warn, error)")
flag.BoolVar(&cfg.DevMode, "dev", false, "Enable development mode with enhanced logging")
flag.IntVar(&cfg.MaxRetries, "max-retries", 3, "Maximum number of retries for failed requests")
flag.IntVar(&cfg.Timeout, "timeout", 300, "Request timeout in seconds")
flag.BoolVar(&cfg.VerifySSL, "verify-ssl", true, "Verify SSL certificates")
flag.StringVar(&cfg.ModelName, "model", "hunyuan-t1-latest", "Default Hunyuan model name")
flag.StringVar(&cfg.BearerToken, "token", "7auGXNATFSKl7dF", "Bearer token for Hunyuan API")
flag.IntVar(&cfg.WorkerCount, "workers", 50, "Number of worker goroutines in the pool")
flag.IntVar(&cfg.QueueSize, "queue-size", 500, "Size of the task queue")
flag.IntVar(&cfg.MaxConcurrent, "max-concurrent", 100, "Maximum number of concurrent requests")
flag.Parse()
// 如果开发模式开启,自动设置日志级别为debug
if cfg.DevMode && cfg.LogLevel != LogLevelDebug {
cfg.LogLevel = LogLevelDebug
fmt.Println("开发模式已启用,日志级别设置为debug")
}
return cfg
}
// 全局配置变量
var (
appConfig *Config
)
// 性能指标
var (
requestCounter int64
successCounter int64
errorCounter int64
avgResponseTime int64
latencyHistogram [10]int64 // 0-100ms, 100-200ms, ... >1s
queuedRequests int64 // 当前在队列中的请求数
rejectedRequests int64 // 被拒绝的请求数
)
// 并发控制组件
var (
workerPool *WorkerPool // 工作池
requestSem *Semaphore // 请求信号量
)
// 日志记录器
var (
logger *log.Logger
logLevel string
logMutex sync.Mutex
)
// 日志初始化
func initLogger(level string) {
logger = log.New(os.Stdout, "[HunyuanAPI] ", log.LstdFlags)
logLevel = level
}
// 根据日志级别记录日志
func logDebug(format string, v ...interface{}) {
if logLevel == LogLevelDebug {
logMutex.Lock()
logger.Printf("[DEBUG] "+format, v...)
logMutex.Unlock()
}
}
func logInfo(format string, v ...interface{}) {
if logLevel == LogLevelDebug || logLevel == LogLevelInfo {
logMutex.Lock()
logger.Printf("[INFO] "+format, v...)
logMutex.Unlock()
}
}
func logWarn(format string, v ...interface{}) {
if logLevel == LogLevelDebug || logLevel == LogLevelInfo || logLevel == LogLevelWarn {
logMutex.Lock()
logger.Printf("[WARN] "+format, v...)
logMutex.Unlock()
}
}
func logError(format string, v ...interface{}) {
logMutex.Lock()
logger.Printf("[ERROR] "+format, v...)
logMutex.Unlock()
// 错误计数
atomic.AddInt64(&errorCounter, 1)
}
// OpenAI/DeepSeek 消息格式
type APIMessage struct {
Role string `json:"role"`
Content interface{} `json:"content"` // 使用interface{}以支持各种类型
}
// OpenAI/DeepSeek 请求格式
type APIRequest struct {
Model string `json:"model"`
Messages []APIMessage `json:"messages"`
Stream bool `json:"stream"`
Temperature float64 `json:"temperature,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
}
// 腾讯混元请求格式
type HunyuanRequest struct {
Stream bool `json:"stream"`
Model string `json:"model"`
QueryID string `json:"query_id"`
Messages []APIMessage `json:"messages"`
StreamModeration bool `json:"stream_moderation"`
EnableEnhancement bool `json:"enable_enhancement"`
}
// 腾讯混元响应格式
type HunyuanResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
SystemFingerprint string `json:"system_fingerprint"`
Choices []Choice `json:"choices"`
Note string `json:"note,omitempty"`
}
// 选择结构
type Choice struct {
Index int `json:"index"`
Delta Delta `json:"delta"`
FinishReason *string `json:"finish_reason"`
}
// Delta结构,包含内容和推理内容
type Delta struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
ReasoningContent string `json:"reasoning_content,omitempty"`
}
// DeepSeek 流式响应格式
type StreamChunk struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []struct {
Index int `json:"index"`
FinishReason *string `json:"finish_reason,omitempty"`
Delta struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
ReasoningContent string `json:"reasoning_content,omitempty"`
} `json:"delta"`
} `json:"choices"`
}
// 非流式响应格式
type CompletionResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []struct {
Index int `json:"index"`
FinishReason string `json:"finish_reason"`
Message struct {
Role string `json:"role"`
Content string `json:"content"`
ReasoningContent string `json:"reasoning_content,omitempty"`
} `json:"message"`
} `json:"choices"`
Usage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
} `json:"usage"`
}
// 请求计数和互斥锁,用于监控
var (
requestCount uint64 = 0
countMutex sync.Mutex
)
// 主入口函数
func main() {
// 解析配置
appConfig = parseFlags()
// 初始化日志
initLogger(appConfig.LogLevel)
logInfo("启动服务: TargetURL=%s, Address=%s, Port=%s, Version=%s, LogLevel=%s, 支持模型=%v, BearerToken=***, WorkerCount=%d, QueueSize=%d, MaxConcurrent=%d",
TargetURL, appConfig.Address, appConfig.Port, Version, appConfig.LogLevel, SupportedModels,
appConfig.WorkerCount, appConfig.QueueSize, appConfig.MaxConcurrent)
// 创建工作池和信号量
workerPool = NewWorkerPool(appConfig.WorkerCount, appConfig.QueueSize)
requestSem = NewSemaphore(appConfig.MaxConcurrent)
logInfo("工作池已创建: %d个worker, 队列大小为%d", appConfig.WorkerCount, appConfig.QueueSize)
// 配置更高的并发处理能力
http.DefaultTransport.(*http.Transport).MaxIdleConnsPerHost = 100
http.DefaultTransport.(*http.Transport).MaxIdleConns = 100
http.DefaultTransport.(*http.Transport).IdleConnTimeout = 90 * time.Second
// 创建自定义服务器,支持更高并发
server := &http.Server{
Addr: appConfig.Address + ":" + appConfig.Port,
ReadTimeout: time.Duration(appConfig.Timeout) * time.Second,
WriteTimeout: time.Duration(appConfig.Timeout) * time.Second,
IdleTimeout: 120 * time.Second,
Handler: nil, // 使用默认的ServeMux
}
// 创建处理器
http.HandleFunc("/v1/models", func(w http.ResponseWriter, r *http.Request) {
setCORSHeaders(w)
if r.Method == "OPTIONS" {
w.WriteHeader(http.StatusOK)
return
}
handleModelsRequest(w, r)
})
http.HandleFunc("/v1/chat/completions", func(w http.ResponseWriter, r *http.Request) {
setCORSHeaders(w)
if r.Method == "OPTIONS" {
w.WriteHeader(http.StatusOK)
return
}
// 计数器增加
countMutex.Lock()
requestCount++
currentCount := requestCount
countMutex.Unlock()
logInfo("收到新请求 #%d", currentCount)
// 请求计数
atomic.AddInt64(&requestCounter, 1)
// 尝试获取信号量
if !requestSem.TryAcquire() {
// 请求数量超过限制
atomic.AddInt64(&rejectedRequests, 1)
logWarn("请求 #%d 被拒绝: 当前并发请求数已达上限", currentCount)
w.Header().Set("Retry-After", "30")
http.Error(w, "Server is busy, please try again later", http.StatusServiceUnavailable)
return
}
// 释放信号量(在函数返回时)
defer requestSem.Release()
// 处理请求
handleChatCompletionRequestWithPool(w, r, currentCount)
})
// 添加健康检查端点
http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
setCORSHeaders(w)
if r.Method == "OPTIONS" {
w.WriteHeader(http.StatusOK)
return
}
// 获取各种计数器的值
reqCount := atomic.LoadInt64(&requestCounter)
succCount := atomic.LoadInt64(&successCounter)
errCount := atomic.LoadInt64(&errorCounter)
queuedCount := atomic.LoadInt64(&queuedRequests)
rejectedCount := atomic.LoadInt64(&rejectedRequests)
// 计算平均响应时间
var avgTime int64 = 0
if reqCount > 0 {
avgTime = atomic.LoadInt64(&avgResponseTime) / max(reqCount, 1)
}
// 构建延迟直方图数据
histogram := make([]int64, 10)
for i := 0; i < 10; i++ {
histogram[i] = atomic.LoadInt64(&latencyHistogram[i])
}
// 构建响应
stats := map[string]interface{}{
"status": "ok",
"version": Version,
"requests": reqCount,
"success": succCount,
"errors": errCount,
"queued": queuedCount,
"rejected": rejectedCount,
"avg_time_ms": avgTime,
"histogram_ms": histogram,
"worker_count": workerPool.workerCount,
"queue_size": len(workerPool.taskQueue),
"queue_capacity": cap(workerPool.taskQueue),
"queue_percent": float64(len(workerPool.taskQueue)) / float64(cap(workerPool.taskQueue)) * 100,
"concurrent_limit": appConfig.MaxConcurrent,
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
json.NewEncoder(w).Encode(stats)
})
// 创建停止通道
stop := make(chan os.Signal, 1)
signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
// 在goroutine中启动服务器
go func() {
logInfo("Starting proxy server on %s", server.Addr)
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
logError("Failed to start server: %v", err)
os.Exit(1)
}
}()
// 等待停止信号
<-stop
// 创建上下文用于优雅关闭
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// 优雅关闭服务器
logInfo("Server is shutting down...")
if err := server.Shutdown(ctx); err != nil {
logError("Server shutdown failed: %v", err)
}
// 关闭工作池
workerPool.Shutdown()
logInfo("Server gracefully stopped")
}
// 设置CORS头
func setCORSHeaders(w http.ResponseWriter) {
w.Header().Set("Access-Control-Allow-Origin", "*")
w.Header().Set("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
}
// 验证消息格式
func validateMessages(messages []APIMessage) (bool, string) {
reqID := generateRequestID()
logDebug("[reqID:%s] 验证消息格式", reqID)
if messages == nil || len(messages) == 0 {
return false, "Messages array is required"
}
for _, msg := range messages {
if msg.Role == "" || msg.Content == nil {
return false, "Invalid message format: each message must have role and content"
}
}
return true, ""
}
// 从请求头中提取令牌
func extractToken(r *http.Request) (string, error) {
// 获取 Authorization 头部
authHeader := r.Header.Get("Authorization")
if authHeader == "" {
return "", fmt.Errorf("missing Authorization header")
}
// 验证格式并提取令牌
if !strings.HasPrefix(authHeader, "Bearer ") {
return "", fmt.Errorf("invalid Authorization header format, must start with 'Bearer '")
}
// 提取令牌值
token := strings.TrimPrefix(authHeader, "Bearer ")
if token == "" {
return "", fmt.Errorf("empty token in Authorization header")
}
return token, nil
}
// 转换任意类型的内容为字符串
func contentToString(content interface{}) string {
if content == nil {
return ""
}
switch v := content.(type) {
case string:
return v
default:
jsonBytes, err := json.Marshal(v)
if err != nil {
logWarn("将内容转换为JSON失败: %v", err)
return ""
}
return string(jsonBytes)
}
}
// 生成请求ID
func generateQueryID() string {
return fmt.Sprintf("%s%d", getRandomString(8), time.Now().UnixNano())
}
// 判断模型是否在支持列表中
func isModelSupported(modelName string) bool {
for _, supportedModel := range SupportedModels {
if modelName == supportedModel {
return true
}
}
return false
}
// 处理模型列表请求
func handleModelsRequest(w http.ResponseWriter, r *http.Request) {
logInfo("处理模型列表请求")
// 返回模型列表
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
// 构建模型数据
modelData := make([]map[string]interface{}, 0, len(SupportedModels))
for _, model := range SupportedModels {
modelData = append(modelData, map[string]interface{}{
"id": model,
"object": "model",
"created": time.Now().Unix(),
"owned_by": "TencentCloud",
"capabilities": map[string]interface{}{
"chat": true,
"completions": true,
"reasoning": true,
},
})
}
modelsList := map[string]interface{}{
"object": "list",
"data": modelData,
}
json.NewEncoder(w).Encode(modelsList)
}
// 处理聊天补全请求(使用工作池)
func handleChatCompletionRequestWithPool(w http.ResponseWriter, r *http.Request, requestNum uint64) {
reqID := generateRequestID()
startTime := time.Now()
logInfo("[reqID:%s] 处理聊天补全请求 #%d", reqID, requestNum)
// 设置超时上下文
ctx, cancel := context.WithTimeout(r.Context(), time.Duration(appConfig.Timeout)*time.Second)
defer cancel()
// 包含超时上下文的请求
r = r.WithContext(ctx)
// 添加恢复机制,防止panic
defer func() {
if r := recover(); r != nil {
logError("[reqID:%s] 处理请求时发生panic: %v", reqID, r)
http.Error(w, "Internal server error", http.StatusInternalServerError)
}
}()
// 解析请求体
var apiReq APIRequest
if err := json.NewDecoder(r.Body).Decode(&apiReq); err != nil {
logError("[reqID:%s] 解析请求失败: %v", reqID, err)
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
// 验证消息格式
valid, errMsg := validateMessages(apiReq.Messages)
if !valid {
logError("[reqID:%s] 消息格式验证失败: %s", reqID, errMsg)
http.Error(w, errMsg, http.StatusBadRequest)
return
}
// 是否使用流式处理
isStream := apiReq.Stream
// 确定使用的模型
modelName := appConfig.ModelName
if apiReq.Model != "" {
// 检查请求的模型是否是我们支持的
if isModelSupported(apiReq.Model) {
modelName = apiReq.Model
} else {
logWarn("[reqID:%s] 请求的模型 %s 不支持,使用默认模型 %s", reqID, apiReq.Model, modelName)
}
}
logInfo("[reqID:%s] 使用模型: %s", reqID, modelName)
// 创建混元API请求
hunyuanReq := HunyuanRequest{
Stream: true, // 混元API总是使用流式响应
Model: modelName,
QueryID: generateQueryID(),
Messages: apiReq.Messages,
StreamModeration: true,
EnableEnhancement: false,
}
// 创建任务
task := &Task{
r: r,
w: w,
done: make(chan struct{}),
reqID: reqID,
isStream: isStream,
hunyuanReq: hunyuanReq,
}
// 添加到任务队列
atomic.AddInt64(&queuedRequests, 1)
submitted, err := workerPool.SubmitTask(task)
if !submitted {
atomic.AddInt64(&queuedRequests, -1)
atomic.AddInt64(&rejectedRequests, 1)
logError("[reqID:%s] 提交任务失败: %v", reqID, err)
w.Header().Set("Retry-After", "60")
http.Error(w, "Server queue is full, please try again later", http.StatusServiceUnavailable)
return
}
logInfo("[reqID:%s] 任务已提交到队列", reqID)
// 等待任务完成或超时
select {
case <-task.done:
// 任务已完成
logInfo("[reqID:%s] 任务已完成", reqID)
case <-r.Context().Done():
// 请求被取消或超时
logWarn("[reqID:%s] 请求被取消或超时", reqID)
// 注意:虽然请求被取消,但worker可能仍在处理任务
}
// 请求处理完成,更新指标
atomic.AddInt64(&queuedRequests, -1)
elapsed := time.Since(startTime).Milliseconds()
// 更新延迟直方图
bucketIndex := min(int(elapsed/100), 9)
atomic.AddInt64(&latencyHistogram[bucketIndex], 1)
// 更新平均响应时间
atomic.AddInt64(&avgResponseTime, elapsed)
if r.Context().Err() == nil {
// 成功计数增加
atomic.AddInt64(&successCounter, 1)
logInfo("[reqID:%s] 请求处理成功,耗时: %dms", reqID, elapsed)
} else {
logError("[reqID:%s] 请求处理失败: %v, 耗时: %dms", reqID, r.Context().Err(), elapsed)
}
}
// 处理聊天补全请求(原实现,已不使用)
func handleChatCompletionRequest(w http.ResponseWriter, r *http.Request) {
reqID := generateRequestID()
startTime := time.Now()
logInfo("[reqID:%s] 处理聊天补全请求", reqID)
// 解析请求体
var apiReq APIRequest
if err := json.NewDecoder(r.Body).Decode(&apiReq); err != nil {
logError("[reqID:%s] 解析请求失败: %v", reqID, err)
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
// 验证消息格式
valid, errMsg := validateMessages(apiReq.Messages)
if !valid {
logError("[reqID:%s] 消息格式验证失败: %s", reqID, errMsg)
http.Error(w, errMsg, http.StatusBadRequest)
return
}
// 是否使用流式处理
isStream := apiReq.Stream
// 确定使用的模型
modelName := appConfig.ModelName
if apiReq.Model != "" {
// 检查请求的模型是否是我们支持的
if isModelSupported(apiReq.Model) {
modelName = apiReq.Model
} else {
logWarn("[reqID:%s] 请求的模型 %s 不支持,使用默认模型 %s", reqID, apiReq.Model, modelName)
}
}
logInfo("[reqID:%s] 使用模型: %s", reqID, modelName)
// 创建混元API请求
hunyuanReq := HunyuanRequest{
Stream: true, // 混元API总是使用流式响应
Model: modelName,
QueryID: generateQueryID(),
Messages: apiReq.Messages,
StreamModeration: true,
EnableEnhancement: false,
}
// 转发请求到混元API
var responseErr error
if isStream {
responseErr = handleStreamingRequest(w, r, hunyuanReq, reqID)
} else {
responseErr = handleNonStreamingRequest(w, r, hunyuanReq, reqID)
}
// 请求处理完成,更新指标
elapsed := time.Since(startTime).Milliseconds()
// 更新延迟直方图
bucketIndex := min(int(elapsed/100), 9)
atomic.AddInt64(&latencyHistogram[bucketIndex], 1)
// 更新平均响应时间
atomic.AddInt64(&avgResponseTime, elapsed)
if responseErr == nil {
// 成功计数增加
atomic.AddInt64(&successCounter, 1)
logInfo("[reqID:%s] 请求处理成功,耗时: %dms", reqID, elapsed)
} else {
logError("[reqID:%s] 请求处理失败: %v, 耗时: %dms", reqID, responseErr, elapsed)
}
}
// 安全的HTTP客户端,支持禁用SSL验证
func getHTTPClient() *http.Client {
tr := &http.Transport{
MaxIdleConnsPerHost: 100,
IdleConnTimeout: 90 * time.Second,
TLSClientConfig: nil, // 默认配置
}
// 如果配置了禁用SSL验证
if !appConfig.VerifySSL {
tr.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
}
return &http.Client{
Timeout: time.Duration(appConfig.Timeout) * time.Second,
Transport: tr,
}
}
// 处理流式请求
func handleStreamingRequest(w http.ResponseWriter, r *http.Request, hunyuanReq HunyuanRequest, reqID string) error {
logInfo("[reqID:%s] 处理流式请求", reqID)
// 序列化请求
jsonData, err := json.Marshal(hunyuanReq)
if err != nil {
logError("[reqID:%s] 序列化请求失败: %v", reqID, err)
http.Error(w, "Internal server error", http.StatusInternalServerError)
return err
}
// 创建请求
httpReq, err := http.NewRequestWithContext(r.Context(), "POST", TargetURL, bytes.NewBuffer(jsonData))
if err != nil {
logError("[reqID:%s] 创建请求失败: %v", reqID, err)
http.Error(w, "Internal server error", http.StatusInternalServerError)
return err
}
// 设置请求头
httpReq.Header.Set("Content-Type", "application/json")
httpReq.Header.Set("Model", hunyuanReq.Model)
setCommonHeaders(httpReq)
// 创建HTTP客户端
client := getHTTPClient()
// 发送请求
resp, err := client.Do(httpReq)
if err != nil {
logError("[reqID:%s] 发送请求失败: %v", reqID, err)
http.Error(w, "Failed to connect to API", http.StatusBadGateway)
return err
}
defer resp.Body.Close()
// 检查响应状态
if resp.StatusCode != http.StatusOK {
logError("[reqID:%s] API返回非200状态码: %d", reqID, resp.StatusCode)
bodyBytes, _ := io.ReadAll(resp.Body)
logError("[reqID:%s] 错误响应内容: %s", reqID, string(bodyBytes))
http.Error(w, fmt.Sprintf("API error with status code: %d", resp.StatusCode), resp.StatusCode)
return fmt.Errorf("API返回非200状态码: %d", resp.StatusCode)
}
// 设置响应头
w.Header().Set("Content-Type", "text/event-stream")
w.Header().Set("Cache-Control", "no-cache")
w.Header().Set("Connection", "keep-alive")
// 创建响应ID和时间戳
respID := fmt.Sprintf("chatcmpl-%s", getRandomString(10))
createdTime := time.Now().Unix()
// 创建读取器
reader := bufio.NewReaderSize(resp.Body, 16384)
// 创建Flusher
flusher, ok := w.(http.Flusher)
if !ok {
logError("[reqID:%s] Streaming not supported", reqID)
http.Error(w, "Streaming not supported", http.StatusInternalServerError)
return fmt.Errorf("streaming not supported")
}
// 发送角色块
roleChunk := createRoleChunk(respID, createdTime, hunyuanReq.Model)
w.Write([]byte("data: " + string(roleChunk) + "\n\n"))
flusher.Flush()
// 持续读取响应
for {
// 添加超时检测
select {
case <-r.Context().Done():
logWarn("[reqID:%s] 请求超时或被客户端取消", reqID)
return fmt.Errorf("请求超时或被取消")
default:
// 继续处理
}
// 读取一行数据
line, err := reader.ReadBytes('\n')
if err != nil {
if err != io.EOF {
logError("[reqID:%s] 读取响应出错: %v", reqID, err)
return err
}
break
}
// 处理数据行
lineStr := string(line)
if strings.HasPrefix(lineStr, "data: ") {
jsonStr := strings.TrimPrefix(lineStr, "data: ")
jsonStr = strings.TrimSpace(jsonStr)
// 特殊处理[DONE]消息
if jsonStr == "[DONE]" {
logDebug("[reqID:%s] 收到[DONE]消息", reqID)
w.Write([]byte("data: [DONE]\n\n"))
flusher.Flush()
break
}
// 解析混元响应
var hunyuanResp HunyuanResponse
if err := json.Unmarshal([]byte(jsonStr), &hunyuanResp); err != nil {
logWarn("[reqID:%s] 解析JSON失败: %v, data: %s", reqID, err, jsonStr)
continue
}
// 处理各种类型的内容
for _, choice := range hunyuanResp.Choices {
if choice.Delta.Content != "" {
// 发送内容块
contentChunk := createContentChunk(respID, createdTime, hunyuanReq.Model, choice.Delta.Content)
w.Write([]byte("data: " + string(contentChunk) + "\n\n"))
flusher.Flush()
}
if choice.Delta.ReasoningContent != "" {
// 发送推理内容块
reasoningChunk := createReasoningChunk(respID, createdTime, hunyuanReq.Model, choice.Delta.ReasoningContent)
w.Write([]byte("data: " + string(reasoningChunk) + "\n\n"))
flusher.Flush()
}
// 处理完成标志
if choice.FinishReason != nil {
finishReason := *choice.FinishReason
if finishReason != "" {
doneChunk := createDoneChunk(respID, createdTime, hunyuanReq.Model, finishReason)
w.Write([]byte("data: " + string(doneChunk) + "\n\n"))
flusher.Flush()
}
}
}
}
}
// 发送结束信号(如果没有正常结束)
finishReason := "stop"
doneChunk := createDoneChunk(respID, createdTime, hunyuanReq.Model, finishReason)
w.Write([]byte("data: " + string(doneChunk) + "\n\n"))
w.Write([]byte("data: [DONE]\n\n"))
flusher.Flush()
return nil
}
// 处理非流式请求
func handleNonStreamingRequest(w http.ResponseWriter, r *http.Request, hunyuanReq HunyuanRequest, reqID string) error {
logInfo("[reqID:%s] 处理非流式请求", reqID)
// 序列化请求
jsonData, err := json.Marshal(hunyuanReq)
if err != nil {
logError("[reqID:%s] 序列化请求失败: %v", reqID, err)
http.Error(w, "Internal server error", http.StatusInternalServerError)
return err
}
// 创建请求
httpReq, err := http.NewRequestWithContext(r.Context(), "POST", TargetURL, bytes.NewBuffer(jsonData))
if err != nil {
logError("[reqID:%s] 创建请求失败: %v", reqID, err)
http.Error(w, "Internal server error", http.StatusInternalServerError)
return err
}
// 设置请求头
httpReq.Header.Set("Content-Type", "application/json")
httpReq.Header.Set("Model", hunyuanReq.Model)
setCommonHeaders(httpReq)
// 创建HTTP客户端
client := getHTTPClient()
// 发送请求
resp, err := client.Do(httpReq)
if err != nil {
logError("[reqID:%s] 发送请求失败: %v", reqID, err)
http.Error(w, "Failed to connect to API", http.StatusBadGateway)
return err
}
defer resp.Body.Close()
// 检查响应状态
if resp.StatusCode != http.StatusOK {
logError("[reqID:%s] API返回非200状态码: %d", reqID, resp.StatusCode)
bodyBytes, _ := io.ReadAll(resp.Body)
logError("[reqID:%s] 错误响应内容: %s", reqID, string(bodyBytes))
http.Error(w, fmt.Sprintf("API error with status code: %d", resp.StatusCode), resp.StatusCode)
return fmt.Errorf("API返回非200状态码: %d", resp.StatusCode)
}
// 读取完整的流式响应
bodyBytes, err := io.ReadAll(resp.Body)
if err != nil {
logError("[reqID:%s] 读取响应失败: %v", reqID, err)
http.Error(w, "Failed to read API response", http.StatusInternalServerError)
return err
}
// 解析流式响应并提取完整内容
fullContent, reasoningContent, err := extractFullContentFromStream(bodyBytes, reqID)
if err != nil {
logError("[reqID:%s] 解析流式响应失败: %v", reqID, err)
http.Error(w, "Failed to parse streaming response", http.StatusInternalServerError)
return err
}
// 构建完整的非流式响应
completionResponse := CompletionResponse{
ID: fmt.Sprintf("chatcmpl-%s", getRandomString(10)),
Object: "chat.completion",
Created: time.Now().Unix(),
Model: hunyuanReq.Model,
Choices: []struct {
Index int `json:"index"`
FinishReason string `json:"finish_reason"`
Message struct {
Role string `json:"role"`
Content string `json:"content"`
ReasoningContent string `json:"reasoning_content,omitempty"`
} `json:"message"`
}{
{
Index: 0,
FinishReason: "stop",
Message: struct {
Role string `json:"role"`
Content string `json:"content"`
ReasoningContent string `json:"reasoning_content,omitempty"`
}{
Role: "assistant",
Content: fullContent,
ReasoningContent: reasoningContent,
},
},
},
Usage: struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}{
PromptTokens: len(formatMessages(hunyuanReq.Messages)) / 4,
CompletionTokens: len(fullContent) / 4,
TotalTokens: (len(formatMessages(hunyuanReq.Messages)) + len(fullContent)) / 4,
},
}
// 返回响应
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(completionResponse); err != nil {
logError("[reqID:%s] 编码响应失败: %v", reqID, err)
http.Error(w, "Failed to encode response", http.StatusInternalServerError)
return err
}
return nil
}
// 从流式响应中提取完整内容
func extractFullContentFromStream(bodyBytes []byte, reqID string) (string, string, error) {
bodyStr := string(bodyBytes)
lines := strings.Split(bodyStr, "\n")
// 内容累积器
var contentBuilder strings.Builder
var reasoningBuilder strings.Builder
// 解析每一行
for _, line := range lines {
if strings.HasPrefix(line, "data: ") && !strings.Contains(line, "[DONE]") {
jsonStr := strings.TrimPrefix(line, "data: ")
jsonStr = strings.TrimSpace(jsonStr)
// 解析JSON
var hunyuanResp HunyuanResponse
if err := json.Unmarshal([]byte(jsonStr), &hunyuanResp); err != nil {
continue // 跳过无效JSON
}
// 提取内容和推理内容
for _, choice := range hunyuanResp.Choices {
if choice.Delta.Content != "" {
contentBuilder.WriteString(choice.Delta.Content)
}
if choice.Delta.ReasoningContent != "" {
reasoningBuilder.WriteString(choice.Delta.ReasoningContent)
}
}
}
}
return contentBuilder.String(), reasoningBuilder.String(), nil
}
// 创建角色块
func createRoleChunk(id string, created int64, model string) []byte {
chunk := StreamChunk{
ID: id,
Object: "chat.completion.chunk",
Created: created,
Model: model,
Choices: []struct {
Index int `json:"index"`
FinishReason *string `json:"finish_reason,omitempty"`
Delta struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
ReasoningContent string `json:"reasoning_content,omitempty"`
} `json:"delta"`
}{
{
Index: 0,
Delta: struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
ReasoningContent string `json:"reasoning_content,omitempty"`
}{
Role: "assistant",
},
},
},
}
data, _ := json.Marshal(chunk)
return data
}
// 创建内容块
func createContentChunk(id string, created int64, model string, content string) []byte {
chunk := StreamChunk{
ID: id,
Object: "chat.completion.chunk",
Created: created,
Model: model,
Choices: []struct {
Index int `json:"index"`
FinishReason *string `json:"finish_reason,omitempty"`
Delta struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
ReasoningContent string `json:"reasoning_content,omitempty"`
} `json:"delta"`
}{
{
Index: 0,
Delta: struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
ReasoningContent string `json:"reasoning_content,omitempty"`
}{
Content: content,
},
},
},
}
data, _ := json.Marshal(chunk)
return data
}
// 创建推理内容块
func createReasoningChunk(id string, created int64, model string, reasoningContent string) []byte {
chunk := StreamChunk{
ID: id,
Object: "chat.completion.chunk",
Created: created,
Model: model,
Choices: []struct {
Index int `json:"index"`
FinishReason *string `json:"finish_reason,omitempty"`
Delta struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
ReasoningContent string `json:"reasoning_content,omitempty"`
} `json:"delta"`
}{
{
Index: 0,
Delta: struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
ReasoningContent string `json:"reasoning_content,omitempty"`
}{
ReasoningContent: reasoningContent,
},
},
},
}
data, _ := json.Marshal(chunk)
return data
}
// 创建完成块
func createDoneChunk(id string, created int64, model string, reason string) []byte {
finishReason := reason
chunk := StreamChunk{
ID: id,
Object: "chat.completion.chunk",
Created: created,
Model: model,
Choices: []struct {
Index int `json:"index"`
FinishReason *string `json:"finish_reason,omitempty"`
Delta struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
ReasoningContent string `json:"reasoning_content,omitempty"`
} `json:"delta"`
}{
{
Index: 0,
FinishReason: &finishReason,
Delta: struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
ReasoningContent string `json:"reasoning_content,omitempty"`
}{},
},
},
}
data, _ := json.Marshal(chunk)
return data
}
// 设置常见的请求头 - 参考Python版本
func setCommonHeaders(req *http.Request) {
req.Header.Set("accept", "*/*")
req.Header.Set("accept-language", "zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7")
req.Header.Set("authorization", "Bearer "+appConfig.BearerToken)
req.Header.Set("dnt", "1")
req.Header.Set("origin", "https://llm.hunyuan.tencent.com")
req.Header.Set("polaris", "stream-server-online-sbs-10697")
req.Header.Set("priority", "u=1, i")
req.Header.Set("referer", "https://llm.hunyuan.tencent.com/")
req.Header.Set("sec-ch-ua", "\"Chromium\";v=\"134\", \"Not:A-Brand\";v=\"24\", \"Google Chrome\";v=\"134\"")
req.Header.Set("sec-ch-ua-mobile", "?0")
req.Header.Set("sec-ch-ua-platform", "\"Windows\"")
req.Header.Set("sec-fetch-dest", "empty")
req.Header.Set("sec-fetch-mode", "cors")
req.Header.Set("sec-fetch-site", "same-origin")
req.Header.Set("staffname", "staryxzhang")
req.Header.Set("wsid", "10697")
req.Header.Set("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36")
}
// 生成请求ID
func generateRequestID() string {
return fmt.Sprintf("%x", time.Now().UnixNano())
}
// 生成随机字符串
func getRandomString(length int) string {
const charset = "abcdefghijklmnopqrstuvwxyz0123456789"
b := make([]byte, length)
for i := range b {
b[i] = charset[time.Now().UnixNano()%int64(len(charset))]
time.Sleep(1 * time.Nanosecond)
}
return string(b)
}
// 格式化消息为字符串
func formatMessages(messages []APIMessage) string {
var result strings.Builder
for _, msg := range messages {
result.WriteString(msg.Role)
result.WriteString(": ")
result.WriteString(contentToString(msg.Content))
result.WriteString("\n")
}
return result.String()
}
// 获取两个整数中的最小值
func min(a, b int) int {
if a < b {
return a
}
return b
}
// 获取两个整数中的最大值
func max(a, b int64) int64 {
if a > b {
return a
}
return b
}