backend/internal/repository/concurrency_cache.go

package repository

import (
	"context"
	"errors"
	"fmt"
	"strconv"

	"github.com/Wei-Shaw/sub2api/internal/service"
	"github.com/redis/go-redis/v9"
)

// 并发控制缓存常量定义
//
// 性能优化说明：
// 原实现使用 SCAN 命令遍历独立的槽位键（concurrency:account:{id}:{requestID}），
// 在高并发场景下 SCAN 需要多次往返，且遍历大量键时性能下降明显。
//
// 新实现改用 Redis 有序集合（Sorted Set）：
// 1. 每个账号/用户只有一个键，成员为 requestID，分数为时间戳
// 2. 使用 ZCARD 原子获取并发数，时间复杂度 O(1)
// 3. 使用 ZREMRANGEBYSCORE 清理过期槽位，避免手动管理 TTL
// 4. 单次 Redis 调用完成计数，减少网络往返
const (
	// 并发槽位键前缀（有序集合）
	// 格式: concurrency:account:{accountID}
	accountSlotKeyPrefix = "concurrency:account:"
	// 格式: concurrency:user:{userID}
	userSlotKeyPrefix = "concurrency:user:"
	// 等待队列计数器格式: concurrency:wait:{userID}
	waitQueueKeyPrefix = "concurrency:wait:"
	// 账号级等待队列计数器格式: wait:account:{accountID}
	accountWaitKeyPrefix = "wait:account:"

	// 默认槽位过期时间（分钟），可通过配置覆盖
	defaultSlotTTLMinutes = 15
)

var (
	// acquireScript 使用有序集合计数并在未达上限时添加槽位
	// 使用 Redis TIME 命令获取服务器时间，避免多实例时钟不同步问题
	// KEYS[1] = 有序集合键 (concurrency:account:{id} / concurrency:user:{id})
	// ARGV[1] = maxConcurrency
	// ARGV[2] = TTL（秒）
	// ARGV[3] = requestID
	acquireScript = redis.NewScript(`
		local key = KEYS[1]
		local maxConcurrency = tonumber(ARGV[1])
		local ttl = tonumber(ARGV[2])
		local requestID = ARGV[3]

		-- 使用 Redis 服务器时间，确保多实例时钟一致
		local timeResult = redis.call('TIME')
		local now = tonumber(timeResult[1])
		local expireBefore = now - ttl

		-- 清理过期槽位
		redis.call('ZREMRANGEBYSCORE', key, '-inf', expireBefore)

		-- 检查是否已存在（支持重试场景刷新时间戳）
		local exists = redis.call('ZSCORE', key, requestID)
		if exists ~= false then
			redis.call('ZADD', key, now, requestID)
			redis.call('EXPIRE', key, ttl)
			return 1
		end

		-- 检查是否达到并发上限
		local count = redis.call('ZCARD', key)
		if count < maxConcurrency then
			redis.call('ZADD', key, now, requestID)
			redis.call('EXPIRE', key, ttl)
			return 1
		end

		return 0
	`)

	// getCountScript 统计有序集合中的槽位数量并清理过期条目
	// 使用 Redis TIME 命令获取服务器时间
	// KEYS[1] = 有序集合键
	// ARGV[1] = TTL（秒）
	getCountScript = redis.NewScript(`
		local key = KEYS[1]
		local ttl = tonumber(ARGV[1])

		-- 使用 Redis 服务器时间
		local timeResult = redis.call('TIME')
		local now = tonumber(timeResult[1])
		local expireBefore = now - ttl

		redis.call('ZREMRANGEBYSCORE', key, '-inf', expireBefore)
		return redis.call('ZCARD', key)
	`)

	// incrementWaitScript - refreshes TTL on each increment to keep queue depth accurate
	// KEYS[1] = wait queue key
	// ARGV[1] = maxWait
	// ARGV[2] = TTL in seconds
	incrementWaitScript = redis.NewScript(`
		local current = redis.call('GET', KEYS[1])
		if current == false then
			current = 0
		else
			current = tonumber(current)
		end

		if current >= tonumber(ARGV[1]) then
			return 0
		end

		local newVal = redis.call('INCR', KEYS[1])

		-- Refresh TTL so long-running traffic doesn't expire active queue counters.
		redis.call('EXPIRE', KEYS[1], ARGV[2])

			return 1
		`)

	// incrementAccountWaitScript - account-level wait queue count (refresh TTL on each increment)
	incrementAccountWaitScript = redis.NewScript(`
			local current = redis.call('GET', KEYS[1])
			if current == false then
				current = 0
			else
				current = tonumber(current)
			end

			if current >= tonumber(ARGV[1]) then
				return 0
			end

			local newVal = redis.call('INCR', KEYS[1])

			-- Refresh TTL so long-running traffic doesn't expire active queue counters.
			redis.call('EXPIRE', KEYS[1], ARGV[2])

			return 1
		`)

	// decrementWaitScript - same as before
	decrementWaitScript = redis.NewScript(`
			local current = redis.call('GET', KEYS[1])
			if current ~= false and tonumber(current) > 0 then
				redis.call('DECR', KEYS[1])
			end
			return 1
		`)

	// cleanupExpiredSlotsScript 清理单个账号/用户有序集合中过期槽位
	// KEYS[1] = 有序集合键
	// ARGV[1] = TTL（秒）
	cleanupExpiredSlotsScript = redis.NewScript(`
		local key = KEYS[1]
		local ttl = tonumber(ARGV[1])
		local timeResult = redis.call('TIME')
		local now = tonumber(timeResult[1])
		local expireBefore = now - ttl
		redis.call('ZREMRANGEBYSCORE', key, '-inf', expireBefore)
		if redis.call('ZCARD', key) == 0 then
			redis.call('DEL', key)
		else
			redis.call('EXPIRE', key, ttl)
		end
		return 1
	`)

	// startupCleanupScript 清理非当前进程前缀的槽位成员。
	// KEYS 是有序集合键列表，ARGV[1] 是当前进程前缀，ARGV[2] 是槽位 TTL。
	// 遍历每个 KEYS[i]，移除前缀不匹配的成员，清空后删 key，否则刷新 EXPIRE。
	startupCleanupScript = redis.NewScript(`
		local activePrefix = ARGV[1]
		local slotTTL = tonumber(ARGV[2])
		local removed = 0
		for i = 1, #KEYS do
			local key = KEYS[i]
			local members = redis.call('ZRANGE', key, 0, -1)
			for _, member in ipairs(members) do
				if string.sub(member, 1, string.len(activePrefix)) ~= activePrefix then
					removed = removed + redis.call('ZREM', key, member)
				end
			end
			if redis.call('ZCARD', key) == 0 then
				redis.call('DEL', key)
			else
				redis.call('EXPIRE', key, slotTTL)
			end
		end
		return removed
	`)
)

type concurrencyCache struct {
	rdb                 *redis.Client
	slotTTLSeconds      int // 槽位过期时间（秒）
	waitQueueTTLSeconds int // 等待队列过期时间（秒）
}

// NewConcurrencyCache 创建并发控制缓存
// slotTTLMinutes: 槽位过期时间（分钟），0 或负数使用默认值 15 分钟
// waitQueueTTLSeconds: 等待队列过期时间（秒），0 或负数使用 slot TTL
func NewConcurrencyCache(rdb *redis.Client, slotTTLMinutes int, waitQueueTTLSeconds int) service.ConcurrencyCache {
	if slotTTLMinutes <= 0 {
		slotTTLMinutes = defaultSlotTTLMinutes
	}
	if waitQueueTTLSeconds <= 0 {
		waitQueueTTLSeconds = slotTTLMinutes * 60
	}
	return &concurrencyCache{
		rdb:                 rdb,
		slotTTLSeconds:      slotTTLMinutes * 60,
		waitQueueTTLSeconds: waitQueueTTLSeconds,
	}
}

// Helper functions for key generation
func accountSlotKey(accountID int64) string {
	return fmt.Sprintf("%s%d", accountSlotKeyPrefix, accountID)
}

func userSlotKey(userID int64) string {
	return fmt.Sprintf("%s%d", userSlotKeyPrefix, userID)
}

func waitQueueKey(userID int64) string {
	return fmt.Sprintf("%s%d", waitQueueKeyPrefix, userID)
}

func accountWaitKey(accountID int64) string {
	return fmt.Sprintf("%s%d", accountWaitKeyPrefix, accountID)
}

// Account slot operations

func (c *concurrencyCache) AcquireAccountSlot(ctx context.Context, accountID int64, maxConcurrency int, requestID string) (bool, error) {
	key := accountSlotKey(accountID)
	// 时间戳在 Lua 脚本内使用 Redis TIME 命令获取，确保多实例时钟一致
	result, err := acquireScript.Run(ctx, c.rdb, []string{key}, maxConcurrency, c.slotTTLSeconds, requestID).Int()
	if err != nil {
		return false, err
	}
	return result == 1, nil
}

func (c *concurrencyCache) ReleaseAccountSlot(ctx context.Context, accountID int64, requestID string) error {
	key := accountSlotKey(accountID)
	return c.rdb.ZRem(ctx, key, requestID).Err()
}

func (c *concurrencyCache) GetAccountConcurrency(ctx context.Context, accountID int64) (int, error) {
	key := accountSlotKey(accountID)
	// 时间戳在 Lua 脚本内使用 Redis TIME 命令获取
	result, err := getCountScript.Run(ctx, c.rdb, []string{key}, c.slotTTLSeconds).Int()
	if err != nil {
		return 0, err
	}
	return result, nil
}

func (c *concurrencyCache) GetAccountConcurrencyBatch(ctx context.Context, accountIDs []int64) (map[int64]int, error) {
	if len(accountIDs) == 0 {
		return map[int64]int{}, nil
	}

	now, err := c.rdb.Time(ctx).Result()
	if err != nil {
		return nil, fmt.Errorf("redis TIME: %w", err)
	}
	cutoffTime := now.Unix() - int64(c.slotTTLSeconds)

	pipe := c.rdb.Pipeline()
	type accountCmd struct {
		accountID int64
		zcardCmd  *redis.IntCmd
	}
	cmds := make([]accountCmd, 0, len(accountIDs))
	for _, accountID := range accountIDs {
		slotKey := accountSlotKeyPrefix + strconv.FormatInt(accountID, 10)
		pipe.ZRemRangeByScore(ctx, slotKey, "-inf", strconv.FormatInt(cutoffTime, 10))
		cmds = append(cmds, accountCmd{
			accountID: accountID,
			zcardCmd:  pipe.ZCard(ctx, slotKey),
		})
	}

	if _, err := pipe.Exec(ctx); err != nil && !errors.Is(err, redis.Nil) {
		return nil, fmt.Errorf("pipeline exec: %w", err)
	}

	result := make(map[int64]int, len(accountIDs))
	for _, cmd := range cmds {
		result[cmd.accountID] = int(cmd.zcardCmd.Val())
	}
	return result, nil
}

// User slot operations

func (c *concurrencyCache) AcquireUserSlot(ctx context.Context, userID int64, maxConcurrency int, requestID string) (bool, error) {
	key := userSlotKey(userID)
	// 时间戳在 Lua 脚本内使用 Redis TIME 命令获取，确保多实例时钟一致
	result, err := acquireScript.Run(ctx, c.rdb, []string{key}, maxConcurrency, c.slotTTLSeconds, requestID).Int()
	if err != nil {
		return false, err
	}
	return result == 1, nil
}

func (c *concurrencyCache) ReleaseUserSlot(ctx context.Context, userID int64, requestID string) error {
	key := userSlotKey(userID)
	return c.rdb.ZRem(ctx, key, requestID).Err()
}

func (c *concurrencyCache) GetUserConcurrency(ctx context.Context, userID int64) (int, error) {
	key := userSlotKey(userID)
	// 时间戳在 Lua 脚本内使用 Redis TIME 命令获取
	result, err := getCountScript.Run(ctx, c.rdb, []string{key}, c.slotTTLSeconds).Int()
	if err != nil {
		return 0, err
	}
	return result, nil
}

// Wait queue operations

func (c *concurrencyCache) IncrementWaitCount(ctx context.Context, userID int64, maxWait int) (bool, error) {
	key := waitQueueKey(userID)
	result, err := incrementWaitScript.Run(ctx, c.rdb, []string{key}, maxWait, c.waitQueueTTLSeconds).Int()
	if err != nil {
		return false, err
	}
	return result == 1, nil
}

func (c *concurrencyCache) DecrementWaitCount(ctx context.Context, userID int64) error {
	key := waitQueueKey(userID)
	_, err := decrementWaitScript.Run(ctx, c.rdb, []string{key}).Result()
	return err
}

// Account wait queue operations

func (c *concurrencyCache) IncrementAccountWaitCount(ctx context.Context, accountID int64, maxWait int) (bool, error) {
	key := accountWaitKey(accountID)
	result, err := incrementAccountWaitScript.Run(ctx, c.rdb, []string{key}, maxWait, c.waitQueueTTLSeconds).Int()
	if err != nil {
		return false, err
	}
	return result == 1, nil
}

func (c *concurrencyCache) DecrementAccountWaitCount(ctx context.Context, accountID int64) error {
	key := accountWaitKey(accountID)
	_, err := decrementWaitScript.Run(ctx, c.rdb, []string{key}).Result()
	return err
}

func (c *concurrencyCache) GetAccountWaitingCount(ctx context.Context, accountID int64) (int, error) {
	key := accountWaitKey(accountID)
	val, err := c.rdb.Get(ctx, key).Int()
	if err != nil && !errors.Is(err, redis.Nil) {
		return 0, err
	}
	if errors.Is(err, redis.Nil) {
		return 0, nil
	}
	return val, nil
}

func (c *concurrencyCache) GetAccountsLoadBatch(ctx context.Context, accounts []service.AccountWithConcurrency) (map[int64]*service.AccountLoadInfo, error) {
	if len(accounts) == 0 {
		return map[int64]*service.AccountLoadInfo{}, nil
	}

	// 使用 Pipeline 替代 Lua 脚本，兼容 Redis Cluster（Lua 内动态拼 key 会 CROSSSLOT）。
	// 每个账号执行 3 个命令：ZREMRANGEBYSCORE（清理过期）、ZCARD（并发数）、GET（等待数）。
	now, err := c.rdb.Time(ctx).Result()
	if err != nil {
		return nil, fmt.Errorf("redis TIME: %w", err)
	}
	cutoffTime := now.Unix() - int64(c.slotTTLSeconds)

	pipe := c.rdb.Pipeline()

	type accountCmds struct {
		id             int64
		maxConcurrency int
		zcardCmd       *redis.IntCmd
		getCmd         *redis.StringCmd
	}
	cmds := make([]accountCmds, 0, len(accounts))
	for _, acc := range accounts {
		slotKey := accountSlotKeyPrefix + strconv.FormatInt(acc.ID, 10)
		waitKey := accountWaitKeyPrefix + strconv.FormatInt(acc.ID, 10)
		pipe.ZRemRangeByScore(ctx, slotKey, "-inf", strconv.FormatInt(cutoffTime, 10))
		ac := accountCmds{
			id:             acc.ID,
			maxConcurrency: acc.MaxConcurrency,
			zcardCmd:       pipe.ZCard(ctx, slotKey),
			getCmd:         pipe.Get(ctx, waitKey),
		}
		cmds = append(cmds, ac)
	}

	if _, err := pipe.Exec(ctx); err != nil && !errors.Is(err, redis.Nil) {
		return nil, fmt.Errorf("pipeline exec: %w", err)
	}

	loadMap := make(map[int64]*service.AccountLoadInfo, len(accounts))
	for _, ac := range cmds {
		currentConcurrency := int(ac.zcardCmd.Val())
		waitingCount := 0
		if v, err := ac.getCmd.Int(); err == nil {
			waitingCount = v
		}
		loadRate := 0
		if ac.maxConcurrency > 0 {
			loadRate = (currentConcurrency + waitingCount) * 100 / ac.maxConcurrency
		}
		loadMap[ac.id] = &service.AccountLoadInfo{
			AccountID:          ac.id,
			CurrentConcurrency: currentConcurrency,
			WaitingCount:       waitingCount,
			LoadRate:           loadRate,
		}
	}

	return loadMap, nil
}

func (c *concurrencyCache) GetUsersLoadBatch(ctx context.Context, users []service.UserWithConcurrency) (map[int64]*service.UserLoadInfo, error) {
	if len(users) == 0 {
		return map[int64]*service.UserLoadInfo{}, nil
	}

	// 使用 Pipeline 替代 Lua 脚本，兼容 Redis Cluster。
	now, err := c.rdb.Time(ctx).Result()
	if err != nil {
		return nil, fmt.Errorf("redis TIME: %w", err)
	}
	cutoffTime := now.Unix() - int64(c.slotTTLSeconds)

	pipe := c.rdb.Pipeline()

	type userCmds struct {
		id             int64
		maxConcurrency int
		zcardCmd       *redis.IntCmd
		getCmd         *redis.StringCmd
	}
	cmds := make([]userCmds, 0, len(users))
	for _, u := range users {
		slotKey := userSlotKeyPrefix + strconv.FormatInt(u.ID, 10)
		waitKey := waitQueueKeyPrefix + strconv.FormatInt(u.ID, 10)
		pipe.ZRemRangeByScore(ctx, slotKey, "-inf", strconv.FormatInt(cutoffTime, 10))
		uc := userCmds{
			id:             u.ID,
			maxConcurrency: u.MaxConcurrency,
			zcardCmd:       pipe.ZCard(ctx, slotKey),
			getCmd:         pipe.Get(ctx, waitKey),
		}
		cmds = append(cmds, uc)
	}

	if _, err := pipe.Exec(ctx); err != nil && !errors.Is(err, redis.Nil) {
		return nil, fmt.Errorf("pipeline exec: %w", err)
	}

	loadMap := make(map[int64]*service.UserLoadInfo, len(users))
	for _, uc := range cmds {
		currentConcurrency := int(uc.zcardCmd.Val())
		waitingCount := 0
		if v, err := uc.getCmd.Int(); err == nil {
			waitingCount = v
		}
		loadRate := 0
		if uc.maxConcurrency > 0 {
			loadRate = (currentConcurrency + waitingCount) * 100 / uc.maxConcurrency
		}
		loadMap[uc.id] = &service.UserLoadInfo{
			UserID:             uc.id,
			CurrentConcurrency: currentConcurrency,
			WaitingCount:       waitingCount,
			LoadRate:           loadRate,
		}
	}

	return loadMap, nil
}

func (c *concurrencyCache) CleanupExpiredAccountSlots(ctx context.Context, accountID int64) error {
	key := accountSlotKey(accountID)
	_, err := cleanupExpiredSlotsScript.Run(ctx, c.rdb, []string{key}, c.slotTTLSeconds).Result()
	return err
}

func (c *concurrencyCache) CleanupStaleProcessSlots(ctx context.Context, activeRequestPrefix string) error {
	if activeRequestPrefix == "" {
		return nil
	}

	// 1. 清理有序集合中非当前进程前缀的成员
	slotPatterns := []string{accountSlotKeyPrefix + "*", userSlotKeyPrefix + "*"}
	for _, pattern := range slotPatterns {
		if err := c.cleanupSlotsByPattern(ctx, pattern, activeRequestPrefix); err != nil {
			return err
		}
	}

	// 2. 删除所有等待队列计数器（重启后计数器失效）
	waitPatterns := []string{accountWaitKeyPrefix + "*", waitQueueKeyPrefix + "*"}
	for _, pattern := range waitPatterns {
		if err := c.deleteKeysByPattern(ctx, pattern); err != nil {
			return err
		}
	}

	return nil
}

// cleanupSlotsByPattern 扫描匹配 pattern 的有序集合键，批量调用 Lua 脚本清理非当前进程成员。
func (c *concurrencyCache) cleanupSlotsByPattern(ctx context.Context, pattern, activePrefix string) error {
	const scanCount = 200
	var cursor uint64
	for {
		keys, nextCursor, err := c.rdb.Scan(ctx, cursor, pattern, scanCount).Result()
		if err != nil {
			return fmt.Errorf("scan %s: %w", pattern, err)
		}
		if len(keys) > 0 {
			_, err := startupCleanupScript.Run(ctx, c.rdb, keys, activePrefix, c.slotTTLSeconds).Result()
			if err != nil {
				return fmt.Errorf("cleanup slots %s: %w", pattern, err)
			}
		}
		cursor = nextCursor
		if cursor == 0 {
			break
		}
	}
	return nil
}

// deleteKeysByPattern 扫描匹配 pattern 的键并删除。
func (c *concurrencyCache) deleteKeysByPattern(ctx context.Context, pattern string) error {
	const scanCount = 200
	var cursor uint64
	for {
		keys, nextCursor, err := c.rdb.Scan(ctx, cursor, pattern, scanCount).Result()
		if err != nil {
			return fmt.Errorf("scan %s: %w", pattern, err)
		}
		if len(keys) > 0 {
			if err := c.rdb.Del(ctx, keys...).Err(); err != nil {
				return fmt.Errorf("del %s: %w", pattern, err)
			}
		}
		cursor = nextCursor
		if cursor == 0 {
			break
		}
	}
	return nil
}
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+								package repository
 								import (
 									"context"
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+									"errors"
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+									"fmt"
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+									"strconv"
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
-												refactor: 删除 ports 目录

											
										
										
											2025-12-25 17:15:01 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/service"
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+									"github.com/redis/go-redis/v9"
 								)
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+								// 并发控制缓存常量定义
 								//
 								// 性能优化说明：
 								// 原实现使用 SCAN 命令遍历独立的槽位键（concurrency:account:{id}:{requestID}），
 								// 在高并发场景下 SCAN 需要多次往返，且遍历大量键时性能下降明显。
 								//
 								// 新实现改用 Redis 有序集合（Sorted Set）：
 								// 1. 每个账号/用户只有一个键，成员为 requestID，分数为时间戳
 								// 2. 使用 ZCARD 原子获取并发数，时间复杂度 O(1)
 								// 3. 使用 ZREMRANGEBYSCORE 清理过期槽位，避免手动管理 TTL
 								// 4. 单次 Redis 调用完成计数，减少网络往返
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+								const (
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									// 并发槽位键前缀（有序集合）
 									// 格式: concurrency:account:{accountID}
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+									accountSlotKeyPrefix = "concurrency:account:"
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									// 格式: concurrency:user:{userID}
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+									userSlotKeyPrefix = "concurrency:user:"
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+									// 等待队列计数器格式: concurrency:wait:{userID}
 									waitQueueKeyPrefix = "concurrency:wait:"
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+									// 账号级等待队列计数器格式: wait:account:{accountID}
 									accountWaitKeyPrefix = "wait:account:"
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									// 默认槽位过期时间（分钟），可通过配置覆盖
 									defaultSlotTTLMinutes = 15
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+								)
 								var (
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									// acquireScript 使用有序集合计数并在未达上限时添加槽位
 									// 使用 Redis TIME 命令获取服务器时间，避免多实例时钟不同步问题
 									// KEYS[1] = 有序集合键 (concurrency:account:{id} / concurrency:user:{id})
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+									// ARGV[1] = maxConcurrency
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									// ARGV[2] = TTL（秒）
 									// ARGV[3] = requestID
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+									acquireScript = redis.NewScript(`
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+										local key = KEYS[1]
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+										local maxConcurrency = tonumber(ARGV[1])
 										local ttl = tonumber(ARGV[2])
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+										local requestID = ARGV[3]
 										-- 使用 Redis 服务器时间，确保多实例时钟一致
 										local timeResult = redis.call('TIME')
 										local now = tonumber(timeResult[1])
 										local expireBefore = now - ttl
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+										-- 清理过期槽位
 										redis.call('ZREMRANGEBYSCORE', key, '-inf', expireBefore)
 										-- 检查是否已存在（支持重试场景刷新时间戳）
 										local exists = redis.call('ZSCORE', key, requestID)
 										if exists ~= false then
 											redis.call('ZADD', key, now, requestID)
 											redis.call('EXPIRE', key, ttl)
 											return 1
 										end
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+										-- 检查是否达到并发上限
 										local count = redis.call('ZCARD', key)
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+										if count < maxConcurrency then
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+											redis.call('ZADD', key, now, requestID)
 											redis.call('EXPIRE', key, ttl)
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+											return 1
 										end
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+										return 0
 									`)
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									// getCountScript 统计有序集合中的槽位数量并清理过期条目
 									// 使用 Redis TIME 命令获取服务器时间
 									// KEYS[1] = 有序集合键
 									// ARGV[1] = TTL（秒）
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+									getCountScript = redis.NewScript(`
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+										local key = KEYS[1]
 										local ttl = tonumber(ARGV[1])
 										-- 使用 Redis 服务器时间
 										local timeResult = redis.call('TIME')
 										local now = tonumber(timeResult[1])
 										local expireBefore = now - ttl
 										redis.call('ZREMRANGEBYSCORE', key, '-inf', expireBefore)
 										return redis.call('ZCARD', key)
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+									`)
-												feat(repository): 实现运维监控数据访问层

- 新增 ops 主仓库（ops_repo.go）
- 实现告警数据访问（ops_repo_alerts.go）
- 实现仪表板数据访问（ops_repo_dashboard.go）
- 实现直方图数据访问（ops_repo_histograms.go）
- 实现延迟直方图桶逻辑（ops_repo_latency_histogram_buckets.go）
- 新增延迟直方图桶测试（ops_repo_latency_histogram_buckets_test.go）
- 实现指标数据访问（ops_repo_metrics.go）
- 实现预聚合数据访问（ops_repo_preagg.go）
- 实现请求详情数据访问（ops_repo_request_details.go）
- 实现趋势数据访问（ops_repo_trends.go）
- 实现窗口统计数据访问（ops_repo_window_stats.go）
- 更新并发缓存支持 ops 场景
- 注册 repository 依赖注入

											
										
										
											2026-01-09 20:52:57 +08:00
+									// incrementWaitScript - refreshes TTL on each increment to keep queue depth accurate
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+									// KEYS[1] = wait queue key
 									// ARGV[1] = maxWait
 									// ARGV[2] = TTL in seconds
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+									incrementWaitScript = redis.NewScript(`
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+										local current = redis.call('GET', KEYS[1])
 										if current == false then
 											current = 0
 										else
 											current = tonumber(current)
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+										end
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+										if current >= tonumber(ARGV[1]) then
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+											return 0
 										end
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+										local newVal = redis.call('INCR', KEYS[1])
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
-												feat(repository): 实现运维监控数据访问层

- 新增 ops 主仓库（ops_repo.go）
- 实现告警数据访问（ops_repo_alerts.go）
- 实现仪表板数据访问（ops_repo_dashboard.go）
- 实现直方图数据访问（ops_repo_histograms.go）
- 实现延迟直方图桶逻辑（ops_repo_latency_histogram_buckets.go）
- 新增延迟直方图桶测试（ops_repo_latency_histogram_buckets_test.go）
- 实现指标数据访问（ops_repo_metrics.go）
- 实现预聚合数据访问（ops_repo_preagg.go）
- 实现请求详情数据访问（ops_repo_request_details.go）
- 实现趋势数据访问（ops_repo_trends.go）
- 实现窗口统计数据访问（ops_repo_window_stats.go）
- 更新并发缓存支持 ops 场景
- 注册 repository 依赖注入

											
										
										
											2026-01-09 20:52:57 +08:00
+										-- Refresh TTL so long-running traffic doesn't expire active queue counters.
 										redis.call('EXPIRE', KEYS[1], ARGV[2])
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+											return 1
 										`)
-												feat(repository): 实现运维监控数据访问层

- 新增 ops 主仓库（ops_repo.go）
- 实现告警数据访问（ops_repo_alerts.go）
- 实现仪表板数据访问（ops_repo_dashboard.go）
- 实现直方图数据访问（ops_repo_histograms.go）
- 实现延迟直方图桶逻辑（ops_repo_latency_histogram_buckets.go）
- 新增延迟直方图桶测试（ops_repo_latency_histogram_buckets_test.go）
- 实现指标数据访问（ops_repo_metrics.go）
- 实现预聚合数据访问（ops_repo_preagg.go）
- 实现请求详情数据访问（ops_repo_request_details.go）
- 实现趋势数据访问（ops_repo_trends.go）
- 实现窗口统计数据访问（ops_repo_window_stats.go）
- 更新并发缓存支持 ops 场景
- 注册 repository 依赖注入

											
										
										
											2026-01-09 20:52:57 +08:00
+									// incrementAccountWaitScript - account-level wait queue count (refresh TTL on each increment)
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+									incrementAccountWaitScript = redis.NewScript(`
 											local current = redis.call('GET', KEYS[1])
 											if current == false then
 												current = 0
 											else
 												current = tonumber(current)
 											end
 											if current >= tonumber(ARGV[1]) then
 												return 0
 											end
 											local newVal = redis.call('INCR', KEYS[1])
-												feat(repository): 实现运维监控数据访问层

- 新增 ops 主仓库（ops_repo.go）
- 实现告警数据访问（ops_repo_alerts.go）
- 实现仪表板数据访问（ops_repo_dashboard.go）
- 实现直方图数据访问（ops_repo_histograms.go）
- 实现延迟直方图桶逻辑（ops_repo_latency_histogram_buckets.go）
- 新增延迟直方图桶测试（ops_repo_latency_histogram_buckets_test.go）
- 实现指标数据访问（ops_repo_metrics.go）
- 实现预聚合数据访问（ops_repo_preagg.go）
- 实现请求详情数据访问（ops_repo_request_details.go）
- 实现趋势数据访问（ops_repo_trends.go）
- 实现窗口统计数据访问（ops_repo_window_stats.go）
- 更新并发缓存支持 ops 场景
- 注册 repository 依赖注入

											
										
										
											2026-01-09 20:52:57 +08:00
+											-- Refresh TTL so long-running traffic doesn't expire active queue counters.
 											redis.call('EXPIRE', KEYS[1], ARGV[2])
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
 											return 1
 										`)
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+									// decrementWaitScript - same as before
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+									decrementWaitScript = redis.NewScript(`
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+											local current = redis.call('GET', KEYS[1])
 											if current ~= false and tonumber(current) > 0 then
 												redis.call('DECR', KEYS[1])
 											end
 											return 1
 										`)
-												feat: cleanup stale concurrency slots on startup

When the service restarts, concurrency slots from the old process
remain in Redis, causing phantom occupancy. On startup, scan all
concurrency sorted sets and remove members with non-current process
prefix, then clear orphaned wait queue counters.

Uses Go-side SCAN to discover keys (compatible with Redis client
prefix hooks in tests), then passes them to a Lua script for
atomic member-level cleanup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-09 19:55:18 +08:00
+									// cleanupExpiredSlotsScript 清理单个账号/用户有序集合中过期槽位
 									// KEYS[1] = 有序集合键
 									// ARGV[1] = TTL（秒）
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+									cleanupExpiredSlotsScript = redis.NewScript(`
-												feat: cleanup stale concurrency slots on startup

When the service restarts, concurrency slots from the old process
remain in Redis, causing phantom occupancy. On startup, scan all
concurrency sorted sets and remove members with non-current process
prefix, then clear orphaned wait queue counters.

Uses Go-side SCAN to discover keys (compatible with Redis client
prefix hooks in tests), then passes them to a Lua script for
atomic member-level cleanup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-09 19:55:18 +08:00
+										local key = KEYS[1]
 										local ttl = tonumber(ARGV[1])
 										local timeResult = redis.call('TIME')
 										local now = tonumber(timeResult[1])
 										local expireBefore = now - ttl
 										redis.call('ZREMRANGEBYSCORE', key, '-inf', expireBefore)
 										if redis.call('ZCARD', key) == 0 then
 											redis.call('DEL', key)
 										else
 											redis.call('EXPIRE', key, ttl)
 										end
 										return 1
 									`)
 									// startupCleanupScript 清理非当前进程前缀的槽位成员。
 									// KEYS 是有序集合键列表，ARGV[1] 是当前进程前缀，ARGV[2] 是槽位 TTL。
 									// 遍历每个 KEYS[i]，移除前缀不匹配的成员，清空后删 key，否则刷新 EXPIRE。
 									startupCleanupScript = redis.NewScript(`
 										local activePrefix = ARGV[1]
 										local slotTTL = tonumber(ARGV[2])
 										local removed = 0
 										for i = 1, #KEYS do
 											local key = KEYS[i]
 											local members = redis.call('ZRANGE', key, 0, -1)
 											for _, member in ipairs(members) do
 												if string.sub(member, 1, string.len(activePrefix)) ~= activePrefix then
 													removed = removed + redis.call('ZREM', key, member)
 												end
 											end
 											if redis.call('ZCARD', key) == 0 then
 												redis.call('DEL', key)
 											else
 												redis.call('EXPIRE', key, slotTTL)
 											end
 										end
 										return removed
 									`)
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+								)
 								type concurrencyCache struct {
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+									rdb                 *redis.Client
 									slotTTLSeconds      int // 槽位过期时间（秒）
 									waitQueueTTLSeconds int // 等待队列过期时间（秒）
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+								}
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+								// NewConcurrencyCache 创建并发控制缓存
 								// slotTTLMinutes: 槽位过期时间（分钟），0 或负数使用默认值 15 分钟
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+								// waitQueueTTLSeconds: 等待队列过期时间（秒），0 或负数使用 slot TTL
 								func NewConcurrencyCache(rdb *redis.Client, slotTTLMinutes int, waitQueueTTLSeconds int) service.ConcurrencyCache {
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									if slotTTLMinutes <= 0 {
 										slotTTLMinutes = defaultSlotTTLMinutes
 									}
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+									if waitQueueTTLSeconds <= 0 {
 										waitQueueTTLSeconds = slotTTLMinutes * 60
 									}
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									return &concurrencyCache{
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+										rdb:                 rdb,
 										slotTTLSeconds:      slotTTLMinutes * 60,
 										waitQueueTTLSeconds: waitQueueTTLSeconds,
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									}
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+								}
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+								// Helper functions for key generation
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+								func accountSlotKey(accountID int64) string {
 									return fmt.Sprintf("%s%d", accountSlotKeyPrefix, accountID)
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+								}
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+								func userSlotKey(userID int64) string {
 									return fmt.Sprintf("%s%d", userSlotKeyPrefix, userID)
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+								}
 								func waitQueueKey(userID int64) string {
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+									return fmt.Sprintf("%s%d", waitQueueKeyPrefix, userID)
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+								}
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+								func accountWaitKey(accountID int64) string {
 									return fmt.Sprintf("%s%d", accountWaitKeyPrefix, accountID)
 								}
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+								// Account slot operations
 								func (c *concurrencyCache) AcquireAccountSlot(ctx context.Context, accountID int64, maxConcurrency int, requestID string) (bool, error) {
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									key := accountSlotKey(accountID)
 									// 时间戳在 Lua 脚本内使用 Redis TIME 命令获取，确保多实例时钟一致
 									result, err := acquireScript.Run(ctx, c.rdb, []string{key}, maxConcurrency, c.slotTTLSeconds, requestID).Int()
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+									if err != nil {
 										return false, err
 									}
 									return result == 1, nil
 								}
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+								func (c *concurrencyCache) ReleaseAccountSlot(ctx context.Context, accountID int64, requestID string) error {
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									key := accountSlotKey(accountID)
 									return c.rdb.ZRem(ctx, key, requestID).Err()
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+								}
 								func (c *concurrencyCache) GetAccountConcurrency(ctx context.Context, accountID int64) (int, error) {
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									key := accountSlotKey(accountID)
 									// 时间戳在 Lua 脚本内使用 Redis TIME 命令获取
 									result, err := getCountScript.Run(ctx, c.rdb, []string{key}, c.slotTTLSeconds).Int()
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+									if err != nil {
 										return 0, err
 									}
 									return result, nil
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+								}
-												feat(sync): full code sync from release

											
										
										
											2026-02-28 15:01:20 +08:00
+								func (c *concurrencyCache) GetAccountConcurrencyBatch(ctx context.Context, accountIDs []int64) (map[int64]int, error) {
 									if len(accountIDs) == 0 {
 										return map[int64]int{}, nil
 									}
 									now, err := c.rdb.Time(ctx).Result()
 									if err != nil {
 										return nil, fmt.Errorf("redis TIME: %w", err)
 									}
 									cutoffTime := now.Unix() - int64(c.slotTTLSeconds)
 									pipe := c.rdb.Pipeline()
 									type accountCmd struct {
 										accountID int64
 										zcardCmd  *redis.IntCmd
 									}
 									cmds := make([]accountCmd, 0, len(accountIDs))
 									for _, accountID := range accountIDs {
 										slotKey := accountSlotKeyPrefix + strconv.FormatInt(accountID, 10)
 										pipe.ZRemRangeByScore(ctx, slotKey, "-inf", strconv.FormatInt(cutoffTime, 10))
 										cmds = append(cmds, accountCmd{
 											accountID: accountID,
 											zcardCmd:  pipe.ZCard(ctx, slotKey),
 										})
 									}
 									if _, err := pipe.Exec(ctx); err != nil && !errors.Is(err, redis.Nil) {
 										return nil, fmt.Errorf("pipeline exec: %w", err)
 									}
 									result := make(map[int64]int, len(accountIDs))
 									for _, cmd := range cmds {
 										result[cmd.accountID] = int(cmd.zcardCmd.Val())
 									}
 									return result, nil
 								}
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+								// User slot operations
 								func (c *concurrencyCache) AcquireUserSlot(ctx context.Context, userID int64, maxConcurrency int, requestID string) (bool, error) {
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									key := userSlotKey(userID)
 									// 时间戳在 Lua 脚本内使用 Redis TIME 命令获取，确保多实例时钟一致
 									result, err := acquireScript.Run(ctx, c.rdb, []string{key}, maxConcurrency, c.slotTTLSeconds, requestID).Int()
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+									if err != nil {
 										return false, err
 									}
 									return result == 1, nil
 								}
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+								func (c *concurrencyCache) ReleaseUserSlot(ctx context.Context, userID int64, requestID string) error {
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									key := userSlotKey(userID)
 									return c.rdb.ZRem(ctx, key, requestID).Err()
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+								}
 								func (c *concurrencyCache) GetUserConcurrency(ctx context.Context, userID int64) (int, error) {
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									key := userSlotKey(userID)
 									// 时间戳在 Lua 脚本内使用 Redis TIME 命令获取
 									result, err := getCountScript.Run(ctx, c.rdb, []string{key}, c.slotTTLSeconds).Int()
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+									if err != nil {
 										return 0, err
 									}
 									return result, nil
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+								}
-												fix(concurrency): 重构并发管理使用独立Key+原生TTL

问题：旧方案使用计数器模式，每次acquire都刷新TTL，导致僵尸数据永不过期

解决方案：
- 每个槽位使用独立Redis Key: concurrency:account:{id}:{requestID}
- 利用Redis原生TTL，每个槽位独立5分钟过期
- 服务崩溃后僵尸数据自动清理，无需手动干预
- 兼容多实例K8s部署

技术改动：
- 新增SCAN脚本统计活跃槽位数量
- 移除冗余的releaseScript，直接使用DEL命令
- Wait队列TTL只在首次创建时设置，避免刷新

											
										
										
											2025-12-24 21:00:29 +08:00
+								// Wait queue operations
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+								func (c *concurrencyCache) IncrementWaitCount(ctx context.Context, userID int64, maxWait int) (bool, error) {
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+									key := waitQueueKey(userID)
 									result, err := incrementWaitScript.Run(ctx, c.rdb, []string{key}, maxWait, c.waitQueueTTLSeconds).Int()
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+									if err != nil {
 										return false, err
 									}
 									return result == 1, nil
 								}
 								func (c *concurrencyCache) DecrementWaitCount(ctx context.Context, userID int64) error {
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+									key := waitQueueKey(userID)
 									_, err := decrementWaitScript.Run(ctx, c.rdb, []string{key}).Result()
-												refactor(backend): 添加 service 缓存端口

											
										
										
											2025-12-19 23:39:28 +08:00
+									return err
 								}
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
 								// Account wait queue operations
 								func (c *concurrencyCache) IncrementAccountWaitCount(ctx context.Context, accountID int64, maxWait int) (bool, error) {
 									key := accountWaitKey(accountID)
 									result, err := incrementAccountWaitScript.Run(ctx, c.rdb, []string{key}, maxWait, c.waitQueueTTLSeconds).Int()
 									if err != nil {
 										return false, err
 									}
 									return result == 1, nil
 								}
 								func (c *concurrencyCache) DecrementAccountWaitCount(ctx context.Context, accountID int64) error {
 									key := accountWaitKey(accountID)
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+									_, err := decrementWaitScript.Run(ctx, c.rdb, []string{key}).Result()
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+									return err
 								}
 								func (c *concurrencyCache) GetAccountWaitingCount(ctx context.Context, accountID int64) (int, error) {
 									key := accountWaitKey(accountID)
 									val, err := c.rdb.Get(ctx, key).Int()
 									if err != nil && !errors.Is(err, redis.Nil) {
 										return 0, err
 									}
 									if errors.Is(err, redis.Nil) {
 										return 0, nil
 									}
 									return val, nil
 								}
 								func (c *concurrencyCache) GetAccountsLoadBatch(ctx context.Context, accounts []service.AccountWithConcurrency) (map[int64]*service.AccountLoadInfo, error) {
 									if len(accounts) == 0 {
 										return map[int64]*service.AccountLoadInfo{}, nil
 									}
-												fix(backend): 修复代码审核发现的 8 个确认问题

- P0-1: subscription_maintenance_queue 使用 RWMutex 防止 channel close/send 竞态
- P0-2: billing_service CalculateCostWithLongContext 修复被吞没的 out-range 错误
- P1-1: timing_wheel_service Schedule/ScheduleRecurring 添加 SetTimer 错误日志
- P1-2: sora_gateway_service StoreFromURLs 失败时降级使用原始 URL
- P1-3: concurrency_cache 用 Pipeline 替代 Lua 脚本兼容 Redis Cluster
- P1-6: sora_media_cleanup_service runCleanup 添加 nil cfg/storage 防护

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 17:51:49 +08:00
+									// 使用 Pipeline 替代 Lua 脚本，兼容 Redis Cluster（Lua 内动态拼 key 会 CROSSSLOT）。
 									// 每个账号执行 3 个命令：ZREMRANGEBYSCORE（清理过期）、ZCARD（并发数）、GET（等待数）。
 									now, err := c.rdb.Time(ctx).Result()
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+									if err != nil {
-												fix(backend): 修复代码审核发现的 8 个确认问题

- P0-1: subscription_maintenance_queue 使用 RWMutex 防止 channel close/send 竞态
- P0-2: billing_service CalculateCostWithLongContext 修复被吞没的 out-range 错误
- P1-1: timing_wheel_service Schedule/ScheduleRecurring 添加 SetTimer 错误日志
- P1-2: sora_gateway_service StoreFromURLs 失败时降级使用原始 URL
- P1-3: concurrency_cache 用 Pipeline 替代 Lua 脚本兼容 Redis Cluster
- P1-6: sora_media_cleanup_service runCleanup 添加 nil cfg/storage 防护

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 17:51:49 +08:00
+										return nil, fmt.Errorf("redis TIME: %w", err)
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+									}
-												fix(backend): 修复代码审核发现的 8 个确认问题

- P0-1: subscription_maintenance_queue 使用 RWMutex 防止 channel close/send 竞态
- P0-2: billing_service CalculateCostWithLongContext 修复被吞没的 out-range 错误
- P1-1: timing_wheel_service Schedule/ScheduleRecurring 添加 SetTimer 错误日志
- P1-2: sora_gateway_service StoreFromURLs 失败时降级使用原始 URL
- P1-3: concurrency_cache 用 Pipeline 替代 Lua 脚本兼容 Redis Cluster
- P1-6: sora_media_cleanup_service runCleanup 添加 nil cfg/storage 防护

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 17:51:49 +08:00
+									cutoffTime := now.Unix() - int64(c.slotTTLSeconds)
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
-												fix(backend): 修复代码审核发现的 8 个确认问题

- P0-1: subscription_maintenance_queue 使用 RWMutex 防止 channel close/send 竞态
- P0-2: billing_service CalculateCostWithLongContext 修复被吞没的 out-range 错误
- P1-1: timing_wheel_service Schedule/ScheduleRecurring 添加 SetTimer 错误日志
- P1-2: sora_gateway_service StoreFromURLs 失败时降级使用原始 URL
- P1-3: concurrency_cache 用 Pipeline 替代 Lua 脚本兼容 Redis Cluster
- P1-6: sora_media_cleanup_service runCleanup 添加 nil cfg/storage 防护

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 17:51:49 +08:00
+									pipe := c.rdb.Pipeline()
 									type accountCmds struct {
 										id             int64
 										maxConcurrency int
 										zcardCmd       *redis.IntCmd
 										getCmd         *redis.StringCmd
 									}
 									cmds := make([]accountCmds, 0, len(accounts))
 									for _, acc := range accounts {
 										slotKey := accountSlotKeyPrefix + strconv.FormatInt(acc.ID, 10)
 										waitKey := accountWaitKeyPrefix + strconv.FormatInt(acc.ID, 10)
 										pipe.ZRemRangeByScore(ctx, slotKey, "-inf", strconv.FormatInt(cutoffTime, 10))
 										ac := accountCmds{
 											id:             acc.ID,
 											maxConcurrency: acc.MaxConcurrency,
 											zcardCmd:       pipe.ZCard(ctx, slotKey),
 											getCmd:         pipe.Get(ctx, waitKey),
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+										}
-												fix(backend): 修复代码审核发现的 8 个确认问题

- P0-1: subscription_maintenance_queue 使用 RWMutex 防止 channel close/send 竞态
- P0-2: billing_service CalculateCostWithLongContext 修复被吞没的 out-range 错误
- P1-1: timing_wheel_service Schedule/ScheduleRecurring 添加 SetTimer 错误日志
- P1-2: sora_gateway_service StoreFromURLs 失败时降级使用原始 URL
- P1-3: concurrency_cache 用 Pipeline 替代 Lua 脚本兼容 Redis Cluster
- P1-6: sora_media_cleanup_service runCleanup 添加 nil cfg/storage 防护

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 17:51:49 +08:00
+										cmds = append(cmds, ac)
 									}
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
-												fix(backend): 修复代码审核发现的 8 个确认问题

- P0-1: subscription_maintenance_queue 使用 RWMutex 防止 channel close/send 竞态
- P0-2: billing_service CalculateCostWithLongContext 修复被吞没的 out-range 错误
- P1-1: timing_wheel_service Schedule/ScheduleRecurring 添加 SetTimer 错误日志
- P1-2: sora_gateway_service StoreFromURLs 失败时降级使用原始 URL
- P1-3: concurrency_cache 用 Pipeline 替代 Lua 脚本兼容 Redis Cluster
- P1-6: sora_media_cleanup_service runCleanup 添加 nil cfg/storage 防护

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 17:51:49 +08:00
+									if _, err := pipe.Exec(ctx); err != nil && !errors.Is(err, redis.Nil) {
 										return nil, fmt.Errorf("pipeline exec: %w", err)
 									}
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
-												fix(backend): 修复代码审核发现的 8 个确认问题

- P0-1: subscription_maintenance_queue 使用 RWMutex 防止 channel close/send 竞态
- P0-2: billing_service CalculateCostWithLongContext 修复被吞没的 out-range 错误
- P1-1: timing_wheel_service Schedule/ScheduleRecurring 添加 SetTimer 错误日志
- P1-2: sora_gateway_service StoreFromURLs 失败时降级使用原始 URL
- P1-3: concurrency_cache 用 Pipeline 替代 Lua 脚本兼容 Redis Cluster
- P1-6: sora_media_cleanup_service runCleanup 添加 nil cfg/storage 防护

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 17:51:49 +08:00
+									loadMap := make(map[int64]*service.AccountLoadInfo, len(accounts))
 									for _, ac := range cmds {
 										currentConcurrency := int(ac.zcardCmd.Val())
 										waitingCount := 0
 										if v, err := ac.getCmd.Int(); err == nil {
 											waitingCount = v
 										}
 										loadRate := 0
 										if ac.maxConcurrency > 0 {
 											loadRate = (currentConcurrency + waitingCount) * 100 / ac.maxConcurrency
 										}
 										loadMap[ac.id] = &service.AccountLoadInfo{
 											AccountID:          ac.id,
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+											CurrentConcurrency: currentConcurrency,
 											WaitingCount:       waitingCount,
 											LoadRate:           loadRate,
 										}
 									}
 									return loadMap, nil
 								}
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+								func (c *concurrencyCache) GetUsersLoadBatch(ctx context.Context, users []service.UserWithConcurrency) (map[int64]*service.UserLoadInfo, error) {
 									if len(users) == 0 {
 										return map[int64]*service.UserLoadInfo{}, nil
 									}
-												fix(backend): 修复代码审核发现的 8 个确认问题

- P0-1: subscription_maintenance_queue 使用 RWMutex 防止 channel close/send 竞态
- P0-2: billing_service CalculateCostWithLongContext 修复被吞没的 out-range 错误
- P1-1: timing_wheel_service Schedule/ScheduleRecurring 添加 SetTimer 错误日志
- P1-2: sora_gateway_service StoreFromURLs 失败时降级使用原始 URL
- P1-3: concurrency_cache 用 Pipeline 替代 Lua 脚本兼容 Redis Cluster
- P1-6: sora_media_cleanup_service runCleanup 添加 nil cfg/storage 防护

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 17:51:49 +08:00
+									// 使用 Pipeline 替代 Lua 脚本，兼容 Redis Cluster。
 									now, err := c.rdb.Time(ctx).Result()
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+									if err != nil {
-												fix(backend): 修复代码审核发现的 8 个确认问题

- P0-1: subscription_maintenance_queue 使用 RWMutex 防止 channel close/send 竞态
- P0-2: billing_service CalculateCostWithLongContext 修复被吞没的 out-range 错误
- P1-1: timing_wheel_service Schedule/ScheduleRecurring 添加 SetTimer 错误日志
- P1-2: sora_gateway_service StoreFromURLs 失败时降级使用原始 URL
- P1-3: concurrency_cache 用 Pipeline 替代 Lua 脚本兼容 Redis Cluster
- P1-6: sora_media_cleanup_service runCleanup 添加 nil cfg/storage 防护

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 17:51:49 +08:00
+										return nil, fmt.Errorf("redis TIME: %w", err)
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+									}
-												fix(backend): 修复代码审核发现的 8 个确认问题

- P0-1: subscription_maintenance_queue 使用 RWMutex 防止 channel close/send 竞态
- P0-2: billing_service CalculateCostWithLongContext 修复被吞没的 out-range 错误
- P1-1: timing_wheel_service Schedule/ScheduleRecurring 添加 SetTimer 错误日志
- P1-2: sora_gateway_service StoreFromURLs 失败时降级使用原始 URL
- P1-3: concurrency_cache 用 Pipeline 替代 Lua 脚本兼容 Redis Cluster
- P1-6: sora_media_cleanup_service runCleanup 添加 nil cfg/storage 防护

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 17:51:49 +08:00
+									cutoffTime := now.Unix() - int64(c.slotTTLSeconds)
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
-												fix(backend): 修复代码审核发现的 8 个确认问题

- P0-1: subscription_maintenance_queue 使用 RWMutex 防止 channel close/send 竞态
- P0-2: billing_service CalculateCostWithLongContext 修复被吞没的 out-range 错误
- P1-1: timing_wheel_service Schedule/ScheduleRecurring 添加 SetTimer 错误日志
- P1-2: sora_gateway_service StoreFromURLs 失败时降级使用原始 URL
- P1-3: concurrency_cache 用 Pipeline 替代 Lua 脚本兼容 Redis Cluster
- P1-6: sora_media_cleanup_service runCleanup 添加 nil cfg/storage 防护

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 17:51:49 +08:00
+									pipe := c.rdb.Pipeline()
 									type userCmds struct {
 										id             int64
 										maxConcurrency int
 										zcardCmd       *redis.IntCmd
 										getCmd         *redis.StringCmd
 									}
 									cmds := make([]userCmds, 0, len(users))
 									for _, u := range users {
 										slotKey := userSlotKeyPrefix + strconv.FormatInt(u.ID, 10)
 										waitKey := waitQueueKeyPrefix + strconv.FormatInt(u.ID, 10)
 										pipe.ZRemRangeByScore(ctx, slotKey, "-inf", strconv.FormatInt(cutoffTime, 10))
 										uc := userCmds{
 											id:             u.ID,
 											maxConcurrency: u.MaxConcurrency,
 											zcardCmd:       pipe.ZCard(ctx, slotKey),
 											getCmd:         pipe.Get(ctx, waitKey),
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+										}
-												fix(backend): 修复代码审核发现的 8 个确认问题

- P0-1: subscription_maintenance_queue 使用 RWMutex 防止 channel close/send 竞态
- P0-2: billing_service CalculateCostWithLongContext 修复被吞没的 out-range 错误
- P1-1: timing_wheel_service Schedule/ScheduleRecurring 添加 SetTimer 错误日志
- P1-2: sora_gateway_service StoreFromURLs 失败时降级使用原始 URL
- P1-3: concurrency_cache 用 Pipeline 替代 Lua 脚本兼容 Redis Cluster
- P1-6: sora_media_cleanup_service runCleanup 添加 nil cfg/storage 防护

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 17:51:49 +08:00
+										cmds = append(cmds, uc)
 									}
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
-												fix(backend): 修复代码审核发现的 8 个确认问题

- P0-1: subscription_maintenance_queue 使用 RWMutex 防止 channel close/send 竞态
- P0-2: billing_service CalculateCostWithLongContext 修复被吞没的 out-range 错误
- P1-1: timing_wheel_service Schedule/ScheduleRecurring 添加 SetTimer 错误日志
- P1-2: sora_gateway_service StoreFromURLs 失败时降级使用原始 URL
- P1-3: concurrency_cache 用 Pipeline 替代 Lua 脚本兼容 Redis Cluster
- P1-6: sora_media_cleanup_service runCleanup 添加 nil cfg/storage 防护

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 17:51:49 +08:00
+									if _, err := pipe.Exec(ctx); err != nil && !errors.Is(err, redis.Nil) {
 										return nil, fmt.Errorf("pipeline exec: %w", err)
 									}
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
-												fix(backend): 修复代码审核发现的 8 个确认问题

- P0-1: subscription_maintenance_queue 使用 RWMutex 防止 channel close/send 竞态
- P0-2: billing_service CalculateCostWithLongContext 修复被吞没的 out-range 错误
- P1-1: timing_wheel_service Schedule/ScheduleRecurring 添加 SetTimer 错误日志
- P1-2: sora_gateway_service StoreFromURLs 失败时降级使用原始 URL
- P1-3: concurrency_cache 用 Pipeline 替代 Lua 脚本兼容 Redis Cluster
- P1-6: sora_media_cleanup_service runCleanup 添加 nil cfg/storage 防护

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 17:51:49 +08:00
+									loadMap := make(map[int64]*service.UserLoadInfo, len(users))
 									for _, uc := range cmds {
 										currentConcurrency := int(uc.zcardCmd.Val())
 										waitingCount := 0
 										if v, err := uc.getCmd.Int(); err == nil {
 											waitingCount = v
 										}
 										loadRate := 0
 										if uc.maxConcurrency > 0 {
 											loadRate = (currentConcurrency + waitingCount) * 100 / uc.maxConcurrency
 										}
 										loadMap[uc.id] = &service.UserLoadInfo{
 											UserID:             uc.id,
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+											CurrentConcurrency: currentConcurrency,
 											WaitingCount:       waitingCount,
 											LoadRate:           loadRate,
 										}
 									}
 									return loadMap, nil
 								}
-												feat(gateway): 实现负载感知的账号调度优化

- 新增调度配置：粘性会话排队、兜底排队、负载计算、槽位清理
- 实现账号级等待队列和批量负载查询（Redis Lua 脚本）
- 三层选择策略：粘性会话优先 → 负载感知选择 → 兜底排队
- 后台定期清理过期槽位，防止资源泄漏
- 集成到所有网关处理器（Claude/Gemini/OpenAI）

											
										
										
											2026-01-01 04:01:51 +08:00
+								func (c *concurrencyCache) CleanupExpiredAccountSlots(ctx context.Context, accountID int64) error {
 									key := accountSlotKey(accountID)
 									_, err := cleanupExpiredSlotsScript.Run(ctx, c.rdb, []string{key}, c.slotTTLSeconds).Result()
 									return err
 								}
-												feat: cleanup stale concurrency slots on startup

When the service restarts, concurrency slots from the old process
remain in Redis, causing phantom occupancy. On startup, scan all
concurrency sorted sets and remove members with non-current process
prefix, then clear orphaned wait queue counters.

Uses Go-side SCAN to discover keys (compatible with Redis client
prefix hooks in tests), then passes them to a Lua script for
atomic member-level cleanup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-03-09 19:55:18 +08:00
 								func (c *concurrencyCache) CleanupStaleProcessSlots(ctx context.Context, activeRequestPrefix string) error {
 									if activeRequestPrefix == "" {
 										return nil
 									}
 									// 1. 清理有序集合中非当前进程前缀的成员
 									slotPatterns := []string{accountSlotKeyPrefix + "*", userSlotKeyPrefix + "*"}
 									for _, pattern := range slotPatterns {
 										if err := c.cleanupSlotsByPattern(ctx, pattern, activeRequestPrefix); err != nil {
 											return err
 										}
 									}
 									// 2. 删除所有等待队列计数器（重启后计数器失效）
 									waitPatterns := []string{accountWaitKeyPrefix + "*", waitQueueKeyPrefix + "*"}
 									for _, pattern := range waitPatterns {
 										if err := c.deleteKeysByPattern(ctx, pattern); err != nil {
 											return err
 										}
 									}
 									return nil
 								}
 								// cleanupSlotsByPattern 扫描匹配 pattern 的有序集合键，批量调用 Lua 脚本清理非当前进程成员。
 								func (c *concurrencyCache) cleanupSlotsByPattern(ctx context.Context, pattern, activePrefix string) error {
 									const scanCount = 200
 									var cursor uint64
 									for {
 										keys, nextCursor, err := c.rdb.Scan(ctx, cursor, pattern, scanCount).Result()
 										if err != nil {
 											return fmt.Errorf("scan %s: %w", pattern, err)
 										}
 										if len(keys) > 0 {
 											_, err := startupCleanupScript.Run(ctx, c.rdb, keys, activePrefix, c.slotTTLSeconds).Result()
 											if err != nil {
 												return fmt.Errorf("cleanup slots %s: %w", pattern, err)
 											}
 										}
 										cursor = nextCursor
 										if cursor == 0 {
 											break
 										}
 									}
 									return nil
 								}
 								// deleteKeysByPattern 扫描匹配 pattern 的键并删除。
 								func (c *concurrencyCache) deleteKeysByPattern(ctx context.Context, pattern string) error {
 									const scanCount = 200
 									var cursor uint64
 									for {
 										keys, nextCursor, err := c.rdb.Scan(ctx, cursor, pattern, scanCount).Result()
 										if err != nil {
 											return fmt.Errorf("scan %s: %w", pattern, err)
 										}
 										if len(keys) > 0 {
 											if err := c.rdb.Del(ctx, keys...).Err(); err != nil {
 												return fmt.Errorf("del %s: %w", pattern, err)
 											}
 										}
 										cursor = nextCursor
 										if cursor == 0 {
 											break
 										}
 									}
 									return nil
 								}