fix: improve sticky session scheduling

This commit is contained in:
shaw
2026-04-30 11:38:11 +08:00
parent 8ad099baa6
commit 733627cf9d
5 changed files with 290 additions and 10 deletions

View File

@@ -262,6 +262,12 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
}
sessionHash := h.gatewayService.GenerateSessionHash(parsedReq)
// [DEBUG-STICKY] 打印会话 hash 生成结果
reqLog.Info("sticky.session_hash_generated",
zap.String("session_hash", sessionHash),
zap.String("metadata_user_id_raw", parsedReq.MetadataUserID),
)
// 获取平台:优先使用强制平台(/antigravity 路由,中间件已设置 request.Context否则使用分组平台
platform := ""
if forcePlatform, ok := middleware2.GetForcePlatformFromContext(c); ok {
@@ -278,6 +284,11 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
var sessionBoundAccountID int64
if sessionKey != "" {
sessionBoundAccountID, _ = h.gatewayService.GetCachedSessionAccountID(c.Request.Context(), apiKey.GroupID, sessionKey)
// [DEBUG-STICKY] 打印粘性会话查询结果
reqLog.Info("sticky.cache_lookup",
zap.String("session_key", sessionKey),
zap.Int64("bound_account_id", sessionBoundAccountID),
)
if sessionBoundAccountID > 0 {
prefetchedGroupID := int64(0)
if apiKey.GroupID != nil {
@@ -286,6 +297,8 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
ctx := service.WithPrefetchedStickySession(c.Request.Context(), sessionBoundAccountID, prefetchedGroupID, h.metadataBridgeEnabled())
c.Request = c.Request.WithContext(ctx)
}
} else {
reqLog.Info("sticky.no_session_key", zap.String("session_hash", sessionHash))
}
// 判断是否真的绑定了粘性会话:有 sessionKey 且已经绑定到某个账号
hasBoundSession := sessionKey != "" && sessionBoundAccountID > 0
@@ -536,6 +549,12 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
for {
// 选择支持该模型的账号
reqLog.Info("sticky.selecting_account",
zap.String("session_key", sessionKey),
zap.Int64("sticky_bound_account_id", sessionBoundAccountID),
zap.Bool("has_bound_session", hasBoundSession),
zap.Int("failed_account_count", len(fs.FailedAccountIDs)),
)
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), currentAPIKey.GroupID, sessionKey, reqModel, fs.FailedAccountIDs, parsedReq.MetadataUserID, subject.UserID)
if err != nil {
if len(fs.FailedAccountIDs) == 0 {
@@ -569,6 +588,16 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
account := selection.Account
setOpsSelectedAccount(c, account.ID, account.Platform)
// [DEBUG-STICKY] 打印账号选择结果
reqLog.Info("sticky.account_selected",
zap.Int64("selected_account_id", account.ID),
zap.String("account_name", account.Name),
zap.Bool("slot_acquired", selection.Acquired),
zap.Bool("has_wait_plan", selection.WaitPlan != nil),
zap.Int64("sticky_bound_account_id", sessionBoundAccountID),
zap.Bool("sticky_honored", sessionBoundAccountID > 0 && sessionBoundAccountID == account.ID),
)
// 检查请求拦截预热请求、SUGGESTION MODE等
if account.IsInterceptWarmupEnabled() {
interceptType := detectInterceptType(body, reqModel, parsedReq.MaxTokens, reqStream, isClaudeCodeClient)
@@ -635,6 +664,10 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
}
// Slot acquired: no longer waiting in queue.
releaseWait()
reqLog.Info("sticky.bind_after_wait",
zap.String("session_key", sessionKey),
zap.Int64("account_id", account.ID),
)
if err := h.gatewayService.BindStickySession(c.Request.Context(), currentAPIKey.GroupID, sessionKey, account.ID); err != nil {
reqLog.Warn("gateway.bind_sticky_session_failed", zap.Int64("account_id", account.ID), zap.Error(err))
}
@@ -829,6 +862,17 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
}
}
// 绑定粘性会话(成功转发后绑定/刷新)
// - 无现有绑定(首次请求):创建绑定
// - 选中账号与粘性账号一致:刷新 TTL
// - 粘性账号因负载/RPM 被跳过、选中了其他账号:不覆盖原绑定,
// 下次请求粘性账号恢复后仍可命中
if sessionKey != "" && (sessionBoundAccountID == 0 || sessionBoundAccountID == account.ID) {
if err := h.gatewayService.BindStickySession(c.Request.Context(), currentAPIKey.GroupID, sessionKey, account.ID); err != nil {
reqLog.Warn("gateway.bind_sticky_session_failed", zap.Int64("account_id", account.ID), zap.Error(err))
}
}
// 捕获请求信息(用于异步记录,避免在 goroutine 中访问 gin.Context
userAgent := c.GetHeader("User-Agent")
clientIP := ip.GetClientIP(c)