backend/internal/service/gateway_request.go

package service

import (
	"bytes"
	"encoding/json"
	"fmt"
	"math"
	"strings"
	"unsafe"

	"github.com/Wei-Shaw/sub2api/internal/domain"
	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
	"github.com/tidwall/gjson"
	"github.com/tidwall/sjson"
)

var (
	// 这些字节模式用于 fast-path 判断，避免每次 []byte("...") 产生临时分配。
	patternTypeThinking         = []byte(`"type":"thinking"`)
	patternTypeThinkingSpaced   = []byte(`"type": "thinking"`)
	patternTypeRedactedThinking = []byte(`"type":"redacted_thinking"`)
	patternTypeRedactedSpaced   = []byte(`"type": "redacted_thinking"`)

	patternThinkingField       = []byte(`"thinking":`)
	patternThinkingFieldSpaced = []byte(`"thinking" :`)

	patternEmptyContent       = []byte(`"content":[]`)
	patternEmptyContentSpaced = []byte(`"content": []`)
	patternEmptyContentSp1    = []byte(`"content" : []`)
	patternEmptyContentSp2    = []byte(`"content" :[]`)
)

// SessionContext 粘性会话上下文，用于区分不同来源的请求。
// 仅在 GenerateSessionHash 第 3 级 fallback（消息内容 hash）时混入，
// 避免不同用户发送相同消息产生相同 hash 导致账号集中。
type SessionContext struct {
	ClientIP  string
	UserAgent string
	APIKeyID  int64
}

// ParsedRequest 保存网关请求的预解析结果
//
// 性能优化说明：
// 原实现在多个位置重复解析请求体（Handler、Service 各解析一次）：
// 1. gateway_handler.go 解析获取 model 和 stream
// 2. gateway_service.go 再次解析获取 system、messages、metadata
// 3. GenerateSessionHash 又一次解析获取会话哈希所需字段
//
// 新实现一次解析，多处复用：
// 1. 在 Handler 层统一调用 ParseGatewayRequest 一次性解析
// 2. 将解析结果 ParsedRequest 传递给 Service 层
// 3. 避免重复 json.Unmarshal，减少 CPU 和内存开销
type ParsedRequest struct {
	Body            []byte          // 原始请求体（保留用于转发）
	Model           string          // 请求的模型名称
	Stream          bool            // 是否为流式请求
	MetadataUserID  string          // metadata.user_id（用于会话亲和）
	System          any             // system 字段内容
	Messages        []any           // messages 数组
	HasSystem       bool            // 是否包含 system 字段（包含 null 也视为显式传入）
	ThinkingEnabled bool            // 是否开启 thinking（部分平台会影响最终模型名）
	MaxTokens       int             // max_tokens 值（用于探测请求拦截）
	SessionContext  *SessionContext // 可选：请求上下文区分因子（nil 时行为不变）

	// OnUpstreamAccepted 上游接受请求后立即调用（用于提前释放串行锁）
	// 流式请求在收到 2xx 响应头后调用，避免持锁等流完成
	OnUpstreamAccepted func()
}

// ParseGatewayRequest 解析网关请求体并返回结构化结果。
// protocol 指定请求协议格式（domain.PlatformAnthropic / domain.PlatformGemini），
// 不同协议使用不同的 system/messages 字段名。
func ParseGatewayRequest(body []byte, protocol string) (*ParsedRequest, error) {
	// 保持与旧实现一致：请求体必须是合法 JSON。
	// 注意：gjson.GetBytes 对非法 JSON 不会报错，因此需要显式校验。
	if !gjson.ValidBytes(body) {
		return nil, fmt.Errorf("invalid json")
	}

	// 性能：
	// - gjson.GetBytes 会把匹配的 Raw/Str 安全复制成 string（对于巨大 messages 会产生额外拷贝）。
	// - 这里将 body 通过 unsafe 零拷贝视为 string，仅在本函数内使用，且 body 不会被修改。
	jsonStr := *(*string)(unsafe.Pointer(&body))

	parsed := &ParsedRequest{
		Body: body,
	}

	// --- gjson 提取简单字段（避免完整 Unmarshal） ---

	// model: 需要严格类型校验，非 string 返回错误
	modelResult := gjson.Get(jsonStr, "model")
	if modelResult.Exists() {
		if modelResult.Type != gjson.String {
			return nil, fmt.Errorf("invalid model field type")
		}
		parsed.Model = modelResult.String()
	}

	// stream: 需要严格类型校验，非 bool 返回错误
	streamResult := gjson.Get(jsonStr, "stream")
	if streamResult.Exists() {
		if streamResult.Type != gjson.True && streamResult.Type != gjson.False {
			return nil, fmt.Errorf("invalid stream field type")
		}
		parsed.Stream = streamResult.Bool()
	}

	// metadata.user_id: 直接路径提取，不需要严格类型校验
	parsed.MetadataUserID = gjson.Get(jsonStr, "metadata.user_id").String()

	// thinking.type: enabled/adaptive 都视为开启
	thinkingType := gjson.Get(jsonStr, "thinking.type").String()
	if thinkingType == "enabled" || thinkingType == "adaptive" {
		parsed.ThinkingEnabled = true
	}

	// max_tokens: 仅接受整数值
	maxTokensResult := gjson.Get(jsonStr, "max_tokens")
	if maxTokensResult.Exists() && maxTokensResult.Type == gjson.Number {
		f := maxTokensResult.Float()
		if !math.IsNaN(f) && !math.IsInf(f, 0) && f == math.Trunc(f) &&
			f <= float64(math.MaxInt) && f >= float64(math.MinInt) {
			parsed.MaxTokens = int(f)
		}
	}

	// --- system/messages 提取 ---
	// 避免把整个 body Unmarshal 到 map（会产生大量 map/接口分配）。
	// 使用 gjson 抽取目标字段的 Raw，再对该子树进行 Unmarshal。

	switch protocol {
	case domain.PlatformGemini:
		// Gemini 原生格式: systemInstruction.parts / contents
		if sysParts := gjson.Get(jsonStr, "systemInstruction.parts"); sysParts.Exists() && sysParts.IsArray() {
			var parts []any
			if err := json.Unmarshal(sliceRawFromBody(body, sysParts), &parts); err != nil {
				return nil, err
			}
			parsed.System = parts
		}

		if contents := gjson.Get(jsonStr, "contents"); contents.Exists() && contents.IsArray() {
			var msgs []any
			if err := json.Unmarshal(sliceRawFromBody(body, contents), &msgs); err != nil {
				return nil, err
			}
			parsed.Messages = msgs
		}
	default:
		// Anthropic / OpenAI 格式: system / messages
		// system 字段只要存在就视为显式提供（即使为 null），
		// 以避免客户端传 null 时被默认 system 误注入。
		if sys := gjson.Get(jsonStr, "system"); sys.Exists() {
			parsed.HasSystem = true
			switch sys.Type {
			case gjson.Null:
				parsed.System = nil
			case gjson.String:
				// 与 encoding/json 的 Unmarshal 行为一致：返回解码后的字符串。
				parsed.System = sys.String()
			default:
				var system any
				if err := json.Unmarshal(sliceRawFromBody(body, sys), &system); err != nil {
					return nil, err
				}
				parsed.System = system
			}
		}

		if msgs := gjson.Get(jsonStr, "messages"); msgs.Exists() && msgs.IsArray() {
			var messages []any
			if err := json.Unmarshal(sliceRawFromBody(body, msgs), &messages); err != nil {
				return nil, err
			}
			parsed.Messages = messages
		}
	}

	return parsed, nil
}

// sliceRawFromBody 返回 Result.Raw 对应的原始字节切片。
// 优先使用 Result.Index 直接从 body 切片，避免对大字段（如 messages）产生额外拷贝。
// 当 Index 不可用时，退化为复制（理论上极少发生）。
func sliceRawFromBody(body []byte, r gjson.Result) []byte {
	if r.Index > 0 {
		end := r.Index + len(r.Raw)
		if end <= len(body) {
			return body[r.Index:end]
		}
	}
	// fallback: 不影响正确性，但会产生一次拷贝
	return []byte(r.Raw)
}

// FilterThinkingBlocks removes thinking blocks from request body
// Returns filtered body or original body if filtering fails (fail-safe)
// This prevents 400 errors from invalid thinking block signatures
//
// 策略：
//   - 当 thinking.type 不是 "enabled"/"adaptive"：移除所有 thinking 相关块
//   - 当 thinking.type 是 "enabled"/"adaptive"：仅移除缺失/无效 signature 的 thinking 块（避免 400）
//     (blocks with missing/empty/dummy signatures that would cause 400 errors)
func FilterThinkingBlocks(body []byte) []byte {
	return filterThinkingBlocksInternal(body, false)
}

// FilterThinkingBlocksForRetry strips thinking-related constructs for retry scenarios.
//
// Why:
//   - Upstreams may reject historical `thinking`/`redacted_thinking` blocks due to invalid/missing signatures.
//   - Anthropic extended thinking has a structural constraint: when top-level `thinking` is enabled and the
//     final message is an assistant prefill, the assistant content must start with a thinking block.
//   - If we remove thinking blocks but keep top-level `thinking` enabled, we can trigger:
//     "Expected `thinking` or `redacted_thinking`, but found `text`"
//
// Strategy (B: preserve content as text):
//   - Disable top-level `thinking` (remove `thinking` field).
//   - Convert `thinking` blocks to `text` blocks (preserve the thinking content).
//   - Remove `redacted_thinking` blocks (cannot be converted to text).
//   - Ensure no message ends up with empty content.
func FilterThinkingBlocksForRetry(body []byte) []byte {
	hasThinkingContent := bytes.Contains(body, patternTypeThinking) ||
		bytes.Contains(body, patternTypeThinkingSpaced) ||
		bytes.Contains(body, patternTypeRedactedThinking) ||
		bytes.Contains(body, patternTypeRedactedSpaced) ||
		bytes.Contains(body, patternThinkingField) ||
		bytes.Contains(body, patternThinkingFieldSpaced)

	// Also check for empty content arrays that need fixing.
	// Note: This is a heuristic check; the actual empty content handling is done below.
	hasEmptyContent := bytes.Contains(body, patternEmptyContent) ||
		bytes.Contains(body, patternEmptyContentSpaced) ||
		bytes.Contains(body, patternEmptyContentSp1) ||
		bytes.Contains(body, patternEmptyContentSp2)

	// Fast path: nothing to process
	if !hasThinkingContent && !hasEmptyContent {
		return body
	}

	// 尽量避免把整个 body Unmarshal 成 map（会产生大量 map/接口分配）。
	// 这里先用 gjson 把 messages 子树摘出来，后续只对 messages 做 Unmarshal/Marshal。
	jsonStr := *(*string)(unsafe.Pointer(&body))
	msgsRes := gjson.Get(jsonStr, "messages")
	if !msgsRes.Exists() || !msgsRes.IsArray() {
		return body
	}

	// Fast path：只需要删除顶层 thinking，不需要改 messages。
	// 注意：patternThinkingField 可能来自嵌套字段（如 tool_use.input.thinking），因此必须用 gjson 判断顶层字段是否存在。
	containsThinkingBlocks := bytes.Contains(body, patternTypeThinking) ||
		bytes.Contains(body, patternTypeThinkingSpaced) ||
		bytes.Contains(body, patternTypeRedactedThinking) ||
		bytes.Contains(body, patternTypeRedactedSpaced) ||
		bytes.Contains(body, patternThinkingFieldSpaced)
	if !hasEmptyContent && !containsThinkingBlocks {
		if topThinking := gjson.Get(jsonStr, "thinking"); topThinking.Exists() {
			if out, err := sjson.DeleteBytes(body, "thinking"); err == nil {
				out = removeThinkingDependentContextStrategies(out)
				return out
			}
			return body
		}
		return body
	}

	var messages []any
	if err := json.Unmarshal(sliceRawFromBody(body, msgsRes), &messages); err != nil {
		return body
	}

	modified := false

	// Disable top-level thinking mode for retry to avoid structural/signature constraints upstream.
	deleteTopLevelThinking := gjson.Get(jsonStr, "thinking").Exists()

	for i := 0; i < len(messages); i++ {
		msgMap, ok := messages[i].(map[string]any)
		if !ok {
			continue
		}

		role, _ := msgMap["role"].(string)
		content, ok := msgMap["content"].([]any)
		if !ok {
			// String content or other format - keep as is
			continue
		}

		// 延迟分配：只有检测到需要修改的块，才构建新 slice。
		var newContent []any
		modifiedThisMsg := false

		ensureNewContent := func(prefixLen int) {
			if newContent != nil {
				return
			}
			newContent = make([]any, 0, len(content))
			if prefixLen > 0 {
				newContent = append(newContent, content[:prefixLen]...)
			}
		}

		for bi := 0; bi < len(content); bi++ {
			block := content[bi]
			blockMap, ok := block.(map[string]any)
			if !ok {
				if newContent != nil {
					newContent = append(newContent, block)
				}
				continue
			}

			blockType, _ := blockMap["type"].(string)

			// Convert thinking blocks to text (preserve content) and drop redacted_thinking.
			switch blockType {
			case "thinking":
				modifiedThisMsg = true
				ensureNewContent(bi)
				thinkingText, _ := blockMap["thinking"].(string)
				if thinkingText != "" {
					newContent = append(newContent, map[string]any{"type": "text", "text": thinkingText})
				}
				continue
			case "redacted_thinking":
				modifiedThisMsg = true
				ensureNewContent(bi)
				continue
			}

			// Handle blocks without type discriminator but with a "thinking" field.
			if blockType == "" {
				if rawThinking, hasThinking := blockMap["thinking"]; hasThinking {
					modifiedThisMsg = true
					ensureNewContent(bi)
					switch v := rawThinking.(type) {
					case string:
						if v != "" {
							newContent = append(newContent, map[string]any{"type": "text", "text": v})
						}
					default:
						if b, err := json.Marshal(v); err == nil && len(b) > 0 {
							newContent = append(newContent, map[string]any{"type": "text", "text": string(b)})
						}
					}
					continue
				}
			}

			if newContent != nil {
				newContent = append(newContent, block)
			}
		}

		// Handle empty content: either from filtering or originally empty
		if newContent == nil {
			if len(content) == 0 {
				modified = true
				placeholder := "(content removed)"
				if role == "assistant" {
					placeholder = "(assistant content removed)"
				}
				msgMap["content"] = []any{map[string]any{"type": "text", "text": placeholder}}
			}
			continue
		}

		if len(newContent) == 0 {
			modified = true
			placeholder := "(content removed)"
			if role == "assistant" {
				placeholder = "(assistant content removed)"
			}
			msgMap["content"] = []any{map[string]any{"type": "text", "text": placeholder}}
			continue
		}

		if modifiedThisMsg {
			modified = true
			msgMap["content"] = newContent
		}
	}

	if !modified && !deleteTopLevelThinking {
		// Avoid rewriting JSON when no changes are needed.
		return body
	}

	out := body
	if deleteTopLevelThinking {
		if b, err := sjson.DeleteBytes(out, "thinking"); err == nil {
			out = b
		} else {
			return body
		}
		// Removing "thinking" makes any context_management strategy that requires it invalid
		// (e.g. clear_thinking_20251015).  Strip those entries so the retry request does not
		// receive a 400 "strategy requires thinking to be enabled or adaptive".
		out = removeThinkingDependentContextStrategies(out)
	}
	if modified {
		msgsBytes, err := json.Marshal(messages)
		if err != nil {
			return body
		}
		out, err = sjson.SetRawBytes(out, "messages", msgsBytes)
		if err != nil {
			return body
		}
	}
	return out
}

// removeThinkingDependentContextStrategies 从 context_management.edits 中移除
// 需要 thinking 启用的策略（如 clear_thinking_20251015）。
// 当顶层 "thinking" 字段被禁用时必须调用，否则上游会返回
// "strategy requires thinking to be enabled or adaptive"。
func removeThinkingDependentContextStrategies(body []byte) []byte {
	jsonStr := *(*string)(unsafe.Pointer(&body))
	editsRes := gjson.Get(jsonStr, "context_management.edits")
	if !editsRes.Exists() || !editsRes.IsArray() {
		return body
	}

	var filtered []json.RawMessage
	hasRemoved := false
	editsRes.ForEach(func(_, v gjson.Result) bool {
		if v.Get("type").String() == "clear_thinking_20251015" {
			hasRemoved = true
			return true
		}
		filtered = append(filtered, json.RawMessage(v.Raw))
		return true
	})

	if !hasRemoved {
		return body
	}

	if len(filtered) == 0 {
		if b, err := sjson.DeleteBytes(body, "context_management.edits"); err == nil {
			return b
		}
		return body
	}

	filteredBytes, err := json.Marshal(filtered)
	if err != nil {
		return body
	}
	if b, err := sjson.SetRawBytes(body, "context_management.edits", filteredBytes); err == nil {
		return b
	}
	return body
}

// FilterSignatureSensitiveBlocksForRetry is a stronger retry filter for cases where upstream errors indicate
// signature/thought_signature validation issues involving tool blocks.
//
// This performs everything in FilterThinkingBlocksForRetry, plus:
//   - Convert `tool_use` blocks to text (name/id/input) so we stop sending structured tool calls.
//   - Convert `tool_result` blocks to text so we keep tool results visible without tool semantics.
//
// Use this only when needed: converting tool blocks to text changes model behaviour and can increase the
// risk of prompt injection (tool output becomes plain conversation text).
func FilterSignatureSensitiveBlocksForRetry(body []byte) []byte {
	// Fast path: only run when we see likely relevant constructs.
	if !bytes.Contains(body, []byte(`"type":"thinking"`)) &&
		!bytes.Contains(body, []byte(`"type": "thinking"`)) &&
		!bytes.Contains(body, []byte(`"type":"redacted_thinking"`)) &&
		!bytes.Contains(body, []byte(`"type": "redacted_thinking"`)) &&
		!bytes.Contains(body, []byte(`"type":"tool_use"`)) &&
		!bytes.Contains(body, []byte(`"type": "tool_use"`)) &&
		!bytes.Contains(body, []byte(`"type":"tool_result"`)) &&
		!bytes.Contains(body, []byte(`"type": "tool_result"`)) &&
		!bytes.Contains(body, []byte(`"thinking":`)) &&
		!bytes.Contains(body, []byte(`"thinking" :`)) {
		return body
	}

	var req map[string]any
	if err := json.Unmarshal(body, &req); err != nil {
		return body
	}

	modified := false

	// Disable top-level thinking for retry to avoid structural/signature constraints upstream.
	if _, exists := req["thinking"]; exists {
		delete(req, "thinking")
		modified = true
		// Remove context_management strategies that require thinking to be enabled
		// (e.g. clear_thinking_20251015), otherwise upstream returns 400.
		if cm, ok := req["context_management"].(map[string]any); ok {
			if edits, ok := cm["edits"].([]any); ok {
				filtered := make([]any, 0, len(edits))
				for _, edit := range edits {
					if editMap, ok := edit.(map[string]any); ok {
						if editMap["type"] == "clear_thinking_20251015" {
							continue
						}
					}
					filtered = append(filtered, edit)
				}
				if len(filtered) != len(edits) {
					if len(filtered) == 0 {
						delete(cm, "edits")
					} else {
						cm["edits"] = filtered
					}
				}
			}
		}
	}

	messages, ok := req["messages"].([]any)
	if !ok {
		return body
	}

	newMessages := make([]any, 0, len(messages))

	for _, msg := range messages {
		msgMap, ok := msg.(map[string]any)
		if !ok {
			newMessages = append(newMessages, msg)
			continue
		}

		role, _ := msgMap["role"].(string)
		content, ok := msgMap["content"].([]any)
		if !ok {
			newMessages = append(newMessages, msg)
			continue
		}

		newContent := make([]any, 0, len(content))
		modifiedThisMsg := false

		for _, block := range content {
			blockMap, ok := block.(map[string]any)
			if !ok {
				newContent = append(newContent, block)
				continue
			}

			blockType, _ := blockMap["type"].(string)
			switch blockType {
			case "thinking":
				modifiedThisMsg = true
				thinkingText, _ := blockMap["thinking"].(string)
				if thinkingText == "" {
					continue
				}
				newContent = append(newContent, map[string]any{"type": "text", "text": thinkingText})
				continue
			case "redacted_thinking":
				modifiedThisMsg = true
				continue
			case "tool_use":
				modifiedThisMsg = true
				name, _ := blockMap["name"].(string)
				id, _ := blockMap["id"].(string)
				input := blockMap["input"]
				inputJSON, _ := json.Marshal(input)
				text := "(tool_use)"
				if name != "" {
					text += " name=" + name
				}
				if id != "" {
					text += " id=" + id
				}
				if len(inputJSON) > 0 && string(inputJSON) != "null" {
					text += " input=" + string(inputJSON)
				}
				newContent = append(newContent, map[string]any{"type": "text", "text": text})
				continue
			case "tool_result":
				modifiedThisMsg = true
				toolUseID, _ := blockMap["tool_use_id"].(string)
				isError, _ := blockMap["is_error"].(bool)
				content := blockMap["content"]
				contentJSON, _ := json.Marshal(content)
				text := "(tool_result)"
				if toolUseID != "" {
					text += " tool_use_id=" + toolUseID
				}
				if isError {
					text += " is_error=true"
				}
				if len(contentJSON) > 0 && string(contentJSON) != "null" {
					text += "\n" + string(contentJSON)
				}
				newContent = append(newContent, map[string]any{"type": "text", "text": text})
				continue
			}

			if blockType == "" {
				if rawThinking, hasThinking := blockMap["thinking"]; hasThinking {
					modifiedThisMsg = true
					switch v := rawThinking.(type) {
					case string:
						if v != "" {
							newContent = append(newContent, map[string]any{"type": "text", "text": v})
						}
					default:
						if b, err := json.Marshal(v); err == nil && len(b) > 0 {
							newContent = append(newContent, map[string]any{"type": "text", "text": string(b)})
						}
					}
					continue
				}
			}

			newContent = append(newContent, block)
		}

		if modifiedThisMsg {
			modified = true
			if len(newContent) == 0 {
				placeholder := "(content removed)"
				if role == "assistant" {
					placeholder = "(assistant content removed)"
				}
				newContent = append(newContent, map[string]any{"type": "text", "text": placeholder})
			}
			msgMap["content"] = newContent
		}

		newMessages = append(newMessages, msgMap)
	}

	if !modified {
		return body
	}

	req["messages"] = newMessages
	newBody, err := json.Marshal(req)
	if err != nil {
		return body
	}
	return newBody
}

// filterThinkingBlocksInternal removes invalid thinking blocks from request
// 策略：
//   - 当 thinking.type 不是 "enabled"/"adaptive"：移除所有 thinking 相关块
//   - 当 thinking.type 是 "enabled"/"adaptive"：仅移除缺失/无效 signature 的 thinking 块
func filterThinkingBlocksInternal(body []byte, _ bool) []byte {
	// Fast path: if body doesn't contain "thinking", skip parsing
	if !bytes.Contains(body, []byte(`"type":"thinking"`)) &&
		!bytes.Contains(body, []byte(`"type": "thinking"`)) &&
		!bytes.Contains(body, []byte(`"type":"redacted_thinking"`)) &&
		!bytes.Contains(body, []byte(`"type": "redacted_thinking"`)) &&
		!bytes.Contains(body, []byte(`"thinking":`)) &&
		!bytes.Contains(body, []byte(`"thinking" :`)) {
		return body
	}

	var req map[string]any
	if err := json.Unmarshal(body, &req); err != nil {
		return body
	}

	// Check if thinking is enabled
	thinkingEnabled := false
	if thinking, ok := req["thinking"].(map[string]any); ok {
		if thinkType, ok := thinking["type"].(string); ok && (thinkType == "enabled" || thinkType == "adaptive") {
			thinkingEnabled = true
		}
	}

	messages, ok := req["messages"].([]any)
	if !ok {
		return body
	}

	filtered := false
	for _, msg := range messages {
		msgMap, ok := msg.(map[string]any)
		if !ok {
			continue
		}

		role, _ := msgMap["role"].(string)
		content, ok := msgMap["content"].([]any)
		if !ok {
			continue
		}

		newContent := make([]any, 0, len(content))
		filteredThisMessage := false

		for _, block := range content {
			blockMap, ok := block.(map[string]any)
			if !ok {
				newContent = append(newContent, block)
				continue
			}

			blockType, _ := blockMap["type"].(string)

			if blockType == "thinking" || blockType == "redacted_thinking" {
				// When thinking is enabled and this is an assistant message,
				// only keep thinking blocks with valid signatures
				if thinkingEnabled && role == "assistant" {
					signature, _ := blockMap["signature"].(string)
					if signature != "" && signature != antigravity.DummyThoughtSignature {
						newContent = append(newContent, block)
						continue
					}
				}
				filtered = true
				filteredThisMessage = true
				continue
			}

			// Handle blocks without type discriminator but with "thinking" key
			if blockType == "" {
				if _, hasThinking := blockMap["thinking"]; hasThinking {
					filtered = true
					filteredThisMessage = true
					continue
				}
			}

			newContent = append(newContent, block)
		}

		if filteredThisMessage {
			msgMap["content"] = newContent
		}
	}

	if !filtered {
		return body
	}

	newBody, err := json.Marshal(req)
	if err != nil {
		return body
	}
	return newBody
}

// =========================
// Thinking Budget Rectifier
// =========================

const (
	// BudgetRectifyBudgetTokens is the budget_tokens value to set when rectifying.
	BudgetRectifyBudgetTokens = 32000
	// BudgetRectifyMaxTokens is the max_tokens value to set when rectifying.
	BudgetRectifyMaxTokens = 64000
	// BudgetRectifyMinMaxTokens is the minimum max_tokens that must exceed budget_tokens.
	BudgetRectifyMinMaxTokens = 32001
)

// isThinkingBudgetConstraintError detects whether an upstream error message indicates
// a budget_tokens constraint violation (e.g. "budget_tokens >= 1024").
// Matches three conditions (all must be true):
//  1. Contains "budget_tokens" or "budget tokens"
//  2. Contains "thinking"
//  3. Contains ">= 1024" or "greater than or equal to 1024" or ("1024" + "input should be")
func isThinkingBudgetConstraintError(errMsg string) bool {
	m := strings.ToLower(errMsg)

	// Condition 1: budget_tokens or budget tokens
	hasBudget := strings.Contains(m, "budget_tokens") || strings.Contains(m, "budget tokens")
	if !hasBudget {
		return false
	}

	// Condition 2: thinking
	if !strings.Contains(m, "thinking") {
		return false
	}

	// Condition 3: constraint indicator
	if strings.Contains(m, ">= 1024") || strings.Contains(m, "greater than or equal to 1024") {
		return true
	}
	if strings.Contains(m, "1024") && strings.Contains(m, "input should be") {
		return true
	}

	return false
}

// RectifyThinkingBudget modifies the request body to fix budget_tokens constraint errors.
// It sets thinking.budget_tokens = 32000, thinking.type = "enabled" (unless adaptive),
// and ensures max_tokens >= 32001.
// Returns (modified body, true) if changes were applied, or (original body, false) if not.
func RectifyThinkingBudget(body []byte) ([]byte, bool) {
	// If thinking type is "adaptive", skip rectification entirely
	thinkingType := gjson.GetBytes(body, "thinking.type").String()
	if thinkingType == "adaptive" {
		return body, false
	}

	modified := body
	changed := false

	// Set thinking.type = "enabled"
	if thinkingType != "enabled" {
		if result, err := sjson.SetBytes(modified, "thinking.type", "enabled"); err == nil {
			modified = result
			changed = true
		}
	}

	// Set thinking.budget_tokens = 32000
	currentBudget := gjson.GetBytes(modified, "thinking.budget_tokens").Int()
	if currentBudget != BudgetRectifyBudgetTokens {
		if result, err := sjson.SetBytes(modified, "thinking.budget_tokens", BudgetRectifyBudgetTokens); err == nil {
			modified = result
			changed = true
		}
	}

	// Ensure max_tokens >= BudgetRectifyMinMaxTokens
	maxTokens := gjson.GetBytes(modified, "max_tokens").Int()
	if maxTokens < int64(BudgetRectifyMinMaxTokens) {
		if result, err := sjson.SetBytes(modified, "max_tokens", BudgetRectifyMaxTokens); err == nil {
			modified = result
			changed = true
		}
	}

	return modified, changed
}
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+								package service
 								import (
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+									"bytes"
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									"encoding/json"
 									"fmt"
-												fix: 收敛 Claude Code 探测拦截并补齐回归测试

											
										
										
											2026-02-07 19:04:08 +08:00
+									"math"
-												feat: 支持后台设置是否启用整流开关

											
										
										
											2026-03-07 21:45:18 +08:00
+									"strings"
-												fix(gateway): 优化 ParseGatewayRequest 函数，使用 unsafe 提高性能并增加 JSON 校验

											
										
										
											2026-02-10 22:12:24 +08:00
+									"unsafe"
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
-												fix: parse Gemini native request format in ParseGatewayRequest for correct session hash generation

ParseGatewayRequest only parsed Anthropic format (system/messages),
ignoring Gemini native format (systemInstruction/contents). This caused
GenerateSessionHash to produce identical hashes for all Gemini sessions.

Add protocol parameter to ParseGatewayRequest to branch between
Anthropic and Gemini parsing. Update GenerateSessionHash message
traversal to extract text from both formats.

											
										
										
											2026-02-09 06:47:22 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/domain"
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+									"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
-												perf(backend): 使用 gjson/sjson 优化热路径 JSON 处理

将 API 网关热路径中的 json.Unmarshal+json.Marshal 替换为 gjson 零拷贝查询和 sjson 精准写入：
- unwrapV1InternalResponse 性能提升 22x（4009ns→182ns），内存分配减少 28.5x
- unwrapGeminiResponse、extractGeminiUsage、estimateGeminiCountTokens、ParseGeminiRateLimitResetTime 改为接收 []byte 使用 gjson 提取
- ParseGatewayRequest 的 model/stream/metadata/thinking/max_tokens 改用 gjson 类型安全提取
- Handler 层（sora/openai）改用 gjson 提取字段、sjson 注入/修改字段，移除 map[string]any 中间变量
- Sora Client 响应解析改用 gjson ForEach 遍历，减少内存分配
- 新增约 100 个单元测试用例，所有改动函数覆盖率 >85%

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 08:59:30 +08:00
+									"github.com/tidwall/gjson"
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+									"github.com/tidwall/sjson"
 								)
 								var (
 									// 这些字节模式用于 fast-path 判断，避免每次 []byte("...") 产生临时分配。
 									patternTypeThinking         = []byte(`"type":"thinking"`)
 									patternTypeThinkingSpaced   = []byte(`"type": "thinking"`)
 									patternTypeRedactedThinking = []byte(`"type":"redacted_thinking"`)
 									patternTypeRedactedSpaced   = []byte(`"type": "redacted_thinking"`)
 									patternThinkingField       = []byte(`"thinking":`)
 									patternThinkingFieldSpaced = []byte(`"thinking" :`)
 									patternEmptyContent       = []byte(`"content":[]`)
 									patternEmptyContentSpaced = []byte(`"content": []`)
 									patternEmptyContentSp1    = []byte(`"content" : []`)
 									patternEmptyContentSp2    = []byte(`"content" :[]`)
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+								)
-												fix: prevent sessionHash collision for different users with same messages

Mix SessionContext (ClientIP, UserAgent, APIKeyID) into
GenerateSessionHash 3rd-level fallback to differentiate requests
from different users sending identical content.

Also switch hashContent from SHA256-truncated to XXHash64 for
better performance, and optimize Trie Lua script to match from
longest prefix first.

											
										
										
											2026-02-09 06:46:32 +08:00
+								// SessionContext 粘性会话上下文，用于区分不同来源的请求。
 								// 仅在 GenerateSessionHash 第 3 级 fallback（消息内容 hash）时混入，
 								// 避免不同用户发送相同消息产生相同 hash 导致账号集中。
 								type SessionContext struct {
 									ClientIP  string
 									UserAgent string
 									APIKeyID  int64
 								}
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+								// ParsedRequest 保存网关请求的预解析结果
 								//
 								// 性能优化说明：
 								// 原实现在多个位置重复解析请求体（Handler、Service 各解析一次）：
 								// 1. gateway_handler.go 解析获取 model 和 stream
 								// 2. gateway_service.go 再次解析获取 system、messages、metadata
 								// 3. GenerateSessionHash 又一次解析获取会话哈希所需字段
 								//
 								// 新实现一次解析，多处复用：
 								// 1. 在 Handler 层统一调用 ParseGatewayRequest 一次性解析
 								// 2. 将解析结果 ParsedRequest 传递给 Service 层
 								// 3. 避免重复 json.Unmarshal，减少 CPU 和内存开销
 								type ParsedRequest struct {
-												fix: prevent sessionHash collision for different users with same messages

Mix SessionContext (ClientIP, UserAgent, APIKeyID) into
GenerateSessionHash 3rd-level fallback to differentiate requests
from different users sending identical content.

Also switch hashContent from SHA256-truncated to XXHash64 for
better performance, and optimize Trie Lua script to match from
longest prefix first.

											
										
										
											2026-02-09 06:46:32 +08:00
+									Body            []byte          // 原始请求体（保留用于转发）
 									Model           string          // 请求的模型名称
 									Stream          bool            // 是否为流式请求
 									MetadataUserID  string          // metadata.user_id（用于会话亲和）
 									System          any             // system 字段内容
 									Messages        []any           // messages 数组
 									HasSystem       bool            // 是否包含 system 字段（包含 null 也视为显式传入）
 									ThinkingEnabled bool            // 是否开启 thinking（部分平台会影响最终模型名）
 									MaxTokens       int             // max_tokens 值（用于探测请求拦截）
 									SessionContext  *SessionContext // 可选：请求上下文区分因子（nil 时行为不变）
-												feat(gateway): 双模式用户消息队列 — 串行队列 + 软性限速

新增 UMQ (User Message Queue) 双模式支持:
- serialize: 账号级分布式串行锁 + RPM 自适应延迟（严格限流）
- throttle: 仅 RPM 自适应前置延迟，不阻塞并发（软性限速）

后端:
- config: 新增 Mode 字段，保留 Enabled 向后兼容
- service: 新增 UserMessageQueueService（Lua 锁/延迟算法/清理 worker）
- repository: 新增 UserMsgQueueCache（Redis Lua acquire/release/force-release）
- handler: 新增 UserMsgQueueHelper（SSE ping + 等待循环 + throttle）
- gateway: 按 mode 分支集成 serialize/throttle 逻辑
- lint: 修复 gofmt rewrite rules、errcheck 类型断言、staticcheck QF1012

前端:
- 三态选择器 UI（关闭/软性限速/串行队列）替代 toggle 开关
- BulkEdit 支持 null 语义（不修改）
- i18n 中英文文案

通过 6 轮专家评审（42 次 review）、golangci-lint、单元测试、集成测试。

											
										
										
											2026-03-03 01:02:39 +08:00
 									// OnUpstreamAccepted 上游接受请求后立即调用（用于提前释放串行锁）
 									// 流式请求在收到 2xx 响应头后调用，避免持锁等流完成
 									OnUpstreamAccepted func()
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+								}
-												fix: parse Gemini native request format in ParseGatewayRequest for correct session hash generation

ParseGatewayRequest only parsed Anthropic format (system/messages),
ignoring Gemini native format (systemInstruction/contents). This caused
GenerateSessionHash to produce identical hashes for all Gemini sessions.

Add protocol parameter to ParseGatewayRequest to branch between
Anthropic and Gemini parsing. Update GenerateSessionHash message
traversal to extract text from both formats.

											
										
										
											2026-02-09 06:47:22 +08:00
+								// ParseGatewayRequest 解析网关请求体并返回结构化结果。
 								// protocol 指定请求协议格式（domain.PlatformAnthropic / domain.PlatformGemini），
 								// 不同协议使用不同的 system/messages 字段名。
 								func ParseGatewayRequest(body []byte, protocol string) (*ParsedRequest, error) {
-												fix(gateway): 优化 ParseGatewayRequest 函数，使用 unsafe 提高性能并增加 JSON 校验

											
										
										
											2026-02-10 22:12:24 +08:00
+									// 保持与旧实现一致：请求体必须是合法 JSON。
 									// 注意：gjson.GetBytes 对非法 JSON 不会报错，因此需要显式校验。
 									if !gjson.ValidBytes(body) {
 										return nil, fmt.Errorf("invalid json")
 									}
 									// 性能：
 									// - gjson.GetBytes 会把匹配的 Raw/Str 安全复制成 string（对于巨大 messages 会产生额外拷贝）。
 									// - 这里将 body 通过 unsafe 零拷贝视为 string，仅在本函数内使用，且 body 不会被修改。
 									jsonStr := *(*string)(unsafe.Pointer(&body))
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									parsed := &ParsedRequest{
 										Body: body,
 									}
-												perf(backend): 使用 gjson/sjson 优化热路径 JSON 处理

将 API 网关热路径中的 json.Unmarshal+json.Marshal 替换为 gjson 零拷贝查询和 sjson 精准写入：
- unwrapV1InternalResponse 性能提升 22x（4009ns→182ns），内存分配减少 28.5x
- unwrapGeminiResponse、extractGeminiUsage、estimateGeminiCountTokens、ParseGeminiRateLimitResetTime 改为接收 []byte 使用 gjson 提取
- ParseGatewayRequest 的 model/stream/metadata/thinking/max_tokens 改用 gjson 类型安全提取
- Handler 层（sora/openai）改用 gjson 提取字段、sjson 注入/修改字段，移除 map[string]any 中间变量
- Sora Client 响应解析改用 gjson ForEach 遍历，减少内存分配
- 新增约 100 个单元测试用例，所有改动函数覆盖率 >85%

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 08:59:30 +08:00
+									// --- gjson 提取简单字段（避免完整 Unmarshal） ---
 									// model: 需要严格类型校验，非 string 返回错误
-												fix(gateway): 优化 ParseGatewayRequest 函数，使用 unsafe 提高性能并增加 JSON 校验

											
										
										
											2026-02-10 22:12:24 +08:00
+									modelResult := gjson.Get(jsonStr, "model")
-												perf(backend): 使用 gjson/sjson 优化热路径 JSON 处理

将 API 网关热路径中的 json.Unmarshal+json.Marshal 替换为 gjson 零拷贝查询和 sjson 精准写入：
- unwrapV1InternalResponse 性能提升 22x（4009ns→182ns），内存分配减少 28.5x
- unwrapGeminiResponse、extractGeminiUsage、estimateGeminiCountTokens、ParseGeminiRateLimitResetTime 改为接收 []byte 使用 gjson 提取
- ParseGatewayRequest 的 model/stream/metadata/thinking/max_tokens 改用 gjson 类型安全提取
- Handler 层（sora/openai）改用 gjson 提取字段、sjson 注入/修改字段，移除 map[string]any 中间变量
- Sora Client 响应解析改用 gjson ForEach 遍历，减少内存分配
- 新增约 100 个单元测试用例，所有改动函数覆盖率 >85%

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 08:59:30 +08:00
+									if modelResult.Exists() {
 										if modelResult.Type != gjson.String {
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+											return nil, fmt.Errorf("invalid model field type")
 										}
-												perf(backend): 使用 gjson/sjson 优化热路径 JSON 处理

将 API 网关热路径中的 json.Unmarshal+json.Marshal 替换为 gjson 零拷贝查询和 sjson 精准写入：
- unwrapV1InternalResponse 性能提升 22x（4009ns→182ns），内存分配减少 28.5x
- unwrapGeminiResponse、extractGeminiUsage、estimateGeminiCountTokens、ParseGeminiRateLimitResetTime 改为接收 []byte 使用 gjson 提取
- ParseGatewayRequest 的 model/stream/metadata/thinking/max_tokens 改用 gjson 类型安全提取
- Handler 层（sora/openai）改用 gjson 提取字段、sjson 注入/修改字段，移除 map[string]any 中间变量
- Sora Client 响应解析改用 gjson ForEach 遍历，减少内存分配
- 新增约 100 个单元测试用例，所有改动函数覆盖率 >85%

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 08:59:30 +08:00
+										parsed.Model = modelResult.String()
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									}
-												perf(backend): 使用 gjson/sjson 优化热路径 JSON 处理

将 API 网关热路径中的 json.Unmarshal+json.Marshal 替换为 gjson 零拷贝查询和 sjson 精准写入：
- unwrapV1InternalResponse 性能提升 22x（4009ns→182ns），内存分配减少 28.5x
- unwrapGeminiResponse、extractGeminiUsage、estimateGeminiCountTokens、ParseGeminiRateLimitResetTime 改为接收 []byte 使用 gjson 提取
- ParseGatewayRequest 的 model/stream/metadata/thinking/max_tokens 改用 gjson 类型安全提取
- Handler 层（sora/openai）改用 gjson 提取字段、sjson 注入/修改字段，移除 map[string]any 中间变量
- Sora Client 响应解析改用 gjson ForEach 遍历，减少内存分配
- 新增约 100 个单元测试用例，所有改动函数覆盖率 >85%

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 08:59:30 +08:00
 									// stream: 需要严格类型校验，非 bool 返回错误
-												fix(gateway): 优化 ParseGatewayRequest 函数，使用 unsafe 提高性能并增加 JSON 校验

											
										
										
											2026-02-10 22:12:24 +08:00
+									streamResult := gjson.Get(jsonStr, "stream")
-												perf(backend): 使用 gjson/sjson 优化热路径 JSON 处理

将 API 网关热路径中的 json.Unmarshal+json.Marshal 替换为 gjson 零拷贝查询和 sjson 精准写入：
- unwrapV1InternalResponse 性能提升 22x（4009ns→182ns），内存分配减少 28.5x
- unwrapGeminiResponse、extractGeminiUsage、estimateGeminiCountTokens、ParseGeminiRateLimitResetTime 改为接收 []byte 使用 gjson 提取
- ParseGatewayRequest 的 model/stream/metadata/thinking/max_tokens 改用 gjson 类型安全提取
- Handler 层（sora/openai）改用 gjson 提取字段、sjson 注入/修改字段，移除 map[string]any 中间变量
- Sora Client 响应解析改用 gjson ForEach 遍历，减少内存分配
- 新增约 100 个单元测试用例，所有改动函数覆盖率 >85%

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 08:59:30 +08:00
+									if streamResult.Exists() {
 										if streamResult.Type != gjson.True && streamResult.Type != gjson.False {
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+											return nil, fmt.Errorf("invalid stream field type")
 										}
-												perf(backend): 使用 gjson/sjson 优化热路径 JSON 处理

将 API 网关热路径中的 json.Unmarshal+json.Marshal 替换为 gjson 零拷贝查询和 sjson 精准写入：
- unwrapV1InternalResponse 性能提升 22x（4009ns→182ns），内存分配减少 28.5x
- unwrapGeminiResponse、extractGeminiUsage、estimateGeminiCountTokens、ParseGeminiRateLimitResetTime 改为接收 []byte 使用 gjson 提取
- ParseGatewayRequest 的 model/stream/metadata/thinking/max_tokens 改用 gjson 类型安全提取
- Handler 层（sora/openai）改用 gjson 提取字段、sjson 注入/修改字段，移除 map[string]any 中间变量
- Sora Client 响应解析改用 gjson ForEach 遍历，减少内存分配
- 新增约 100 个单元测试用例，所有改动函数覆盖率 >85%

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 08:59:30 +08:00
+										parsed.Stream = streamResult.Bool()
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									}
-												perf(backend): 使用 gjson/sjson 优化热路径 JSON 处理

将 API 网关热路径中的 json.Unmarshal+json.Marshal 替换为 gjson 零拷贝查询和 sjson 精准写入：
- unwrapV1InternalResponse 性能提升 22x（4009ns→182ns），内存分配减少 28.5x
- unwrapGeminiResponse、extractGeminiUsage、estimateGeminiCountTokens、ParseGeminiRateLimitResetTime 改为接收 []byte 使用 gjson 提取
- ParseGatewayRequest 的 model/stream/metadata/thinking/max_tokens 改用 gjson 类型安全提取
- Handler 层（sora/openai）改用 gjson 提取字段、sjson 注入/修改字段，移除 map[string]any 中间变量
- Sora Client 响应解析改用 gjson ForEach 遍历，减少内存分配
- 新增约 100 个单元测试用例，所有改动函数覆盖率 >85%

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 08:59:30 +08:00
 									// metadata.user_id: 直接路径提取，不需要严格类型校验
-												fix(gateway): 优化 ParseGatewayRequest 函数，使用 unsafe 提高性能并增加 JSON 校验

											
										
										
											2026-02-10 22:12:24 +08:00
+									parsed.MetadataUserID = gjson.Get(jsonStr, "metadata.user_id").String()
-												perf(backend): 使用 gjson/sjson 优化热路径 JSON 处理

将 API 网关热路径中的 json.Unmarshal+json.Marshal 替换为 gjson 零拷贝查询和 sjson 精准写入：
- unwrapV1InternalResponse 性能提升 22x（4009ns→182ns），内存分配减少 28.5x
- unwrapGeminiResponse、extractGeminiUsage、estimateGeminiCountTokens、ParseGeminiRateLimitResetTime 改为接收 []byte 使用 gjson 提取
- ParseGatewayRequest 的 model/stream/metadata/thinking/max_tokens 改用 gjson 类型安全提取
- Handler 层（sora/openai）改用 gjson 提取字段、sjson 注入/修改字段，移除 map[string]any 中间变量
- Sora Client 响应解析改用 gjson ForEach 遍历，减少内存分配
- 新增约 100 个单元测试用例，所有改动函数覆盖率 >85%

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 08:59:30 +08:00
-												Merge branch 'main' into test

											
										
										
											2026-02-12 23:43:47 +08:00
+									// thinking.type: enabled/adaptive 都视为开启
 									thinkingType := gjson.Get(jsonStr, "thinking.type").String()
 									if thinkingType == "enabled" || thinkingType == "adaptive" {
-												perf(backend): 使用 gjson/sjson 优化热路径 JSON 处理

将 API 网关热路径中的 json.Unmarshal+json.Marshal 替换为 gjson 零拷贝查询和 sjson 精准写入：
- unwrapV1InternalResponse 性能提升 22x（4009ns→182ns），内存分配减少 28.5x
- unwrapGeminiResponse、extractGeminiUsage、estimateGeminiCountTokens、ParseGeminiRateLimitResetTime 改为接收 []byte 使用 gjson 提取
- ParseGatewayRequest 的 model/stream/metadata/thinking/max_tokens 改用 gjson 类型安全提取
- Handler 层（sora/openai）改用 gjson 提取字段、sjson 注入/修改字段，移除 map[string]any 中间变量
- Sora Client 响应解析改用 gjson ForEach 遍历，减少内存分配
- 新增约 100 个单元测试用例，所有改动函数覆盖率 >85%

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 08:59:30 +08:00
+										parsed.ThinkingEnabled = true
 									}
 									// max_tokens: 仅接受整数值
-												fix(gateway): 优化 ParseGatewayRequest 函数，使用 unsafe 提高性能并增加 JSON 校验

											
										
										
											2026-02-10 22:12:24 +08:00
+									maxTokensResult := gjson.Get(jsonStr, "max_tokens")
-												perf(backend): 使用 gjson/sjson 优化热路径 JSON 处理

将 API 网关热路径中的 json.Unmarshal+json.Marshal 替换为 gjson 零拷贝查询和 sjson 精准写入：
- unwrapV1InternalResponse 性能提升 22x（4009ns→182ns），内存分配减少 28.5x
- unwrapGeminiResponse、extractGeminiUsage、estimateGeminiCountTokens、ParseGeminiRateLimitResetTime 改为接收 []byte 使用 gjson 提取
- ParseGatewayRequest 的 model/stream/metadata/thinking/max_tokens 改用 gjson 类型安全提取
- Handler 层（sora/openai）改用 gjson 提取字段、sjson 注入/修改字段，移除 map[string]any 中间变量
- Sora Client 响应解析改用 gjson ForEach 遍历，减少内存分配
- 新增约 100 个单元测试用例，所有改动函数覆盖率 >85%

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 08:59:30 +08:00
+									if maxTokensResult.Exists() && maxTokensResult.Type == gjson.Number {
 										f := maxTokensResult.Float()
 										if !math.IsNaN(f) && !math.IsInf(f, 0) && f == math.Trunc(f) &&
 											f <= float64(math.MaxInt) && f >= float64(math.MinInt) {
 											parsed.MaxTokens = int(f)
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+										}
 									}
-												fix: parse Gemini native request format in ParseGatewayRequest for correct session hash generation

ParseGatewayRequest only parsed Anthropic format (system/messages),
ignoring Gemini native format (systemInstruction/contents). This caused
GenerateSessionHash to produce identical hashes for all Gemini sessions.

Add protocol parameter to ParseGatewayRequest to branch between
Anthropic and Gemini parsing. Update GenerateSessionHash message
traversal to extract text from both formats.

											
										
										
											2026-02-09 06:47:22 +08:00
-												fix(gateway): 优化 ParseGatewayRequest 函数，使用 unsafe 提高性能并增加 JSON 校验

											
										
										
											2026-02-10 22:12:24 +08:00
+									// --- system/messages 提取 ---
 									// 避免把整个 body Unmarshal 到 map（会产生大量 map/接口分配）。
 									// 使用 gjson 抽取目标字段的 Raw，再对该子树进行 Unmarshal。
-												perf(backend): 使用 gjson/sjson 优化热路径 JSON 处理

将 API 网关热路径中的 json.Unmarshal+json.Marshal 替换为 gjson 零拷贝查询和 sjson 精准写入：
- unwrapV1InternalResponse 性能提升 22x（4009ns→182ns），内存分配减少 28.5x
- unwrapGeminiResponse、extractGeminiUsage、estimateGeminiCountTokens、ParseGeminiRateLimitResetTime 改为接收 []byte 使用 gjson 提取
- ParseGatewayRequest 的 model/stream/metadata/thinking/max_tokens 改用 gjson 类型安全提取
- Handler 层（sora/openai）改用 gjson 提取字段、sjson 注入/修改字段，移除 map[string]any 中间变量
- Sora Client 响应解析改用 gjson ForEach 遍历，减少内存分配
- 新增约 100 个单元测试用例，所有改动函数覆盖率 >85%

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-10 08:59:30 +08:00
-												fix: parse Gemini native request format in ParseGatewayRequest for correct session hash generation

ParseGatewayRequest only parsed Anthropic format (system/messages),
ignoring Gemini native format (systemInstruction/contents). This caused
GenerateSessionHash to produce identical hashes for all Gemini sessions.

Add protocol parameter to ParseGatewayRequest to branch between
Anthropic and Gemini parsing. Update GenerateSessionHash message
traversal to extract text from both formats.

											
										
										
											2026-02-09 06:47:22 +08:00
+									switch protocol {
 									case domain.PlatformGemini:
 										// Gemini 原生格式: systemInstruction.parts / contents
-												fix(gateway): 优化 ParseGatewayRequest 函数，使用 unsafe 提高性能并增加 JSON 校验

											
										
										
											2026-02-10 22:12:24 +08:00
+										if sysParts := gjson.Get(jsonStr, "systemInstruction.parts"); sysParts.Exists() && sysParts.IsArray() {
 											var parts []any
 											if err := json.Unmarshal(sliceRawFromBody(body, sysParts), &parts); err != nil {
 												return nil, err
-												fix: parse Gemini native request format in ParseGatewayRequest for correct session hash generation

ParseGatewayRequest only parsed Anthropic format (system/messages),
ignoring Gemini native format (systemInstruction/contents). This caused
GenerateSessionHash to produce identical hashes for all Gemini sessions.

Add protocol parameter to ParseGatewayRequest to branch between
Anthropic and Gemini parsing. Update GenerateSessionHash message
traversal to extract text from both formats.

											
										
										
											2026-02-09 06:47:22 +08:00
+											}
-												fix(gateway): 优化 ParseGatewayRequest 函数，使用 unsafe 提高性能并增加 JSON 校验

											
										
										
											2026-02-10 22:12:24 +08:00
+											parsed.System = parts
-												fix: parse Gemini native request format in ParseGatewayRequest for correct session hash generation

ParseGatewayRequest only parsed Anthropic format (system/messages),
ignoring Gemini native format (systemInstruction/contents). This caused
GenerateSessionHash to produce identical hashes for all Gemini sessions.

Add protocol parameter to ParseGatewayRequest to branch between
Anthropic and Gemini parsing. Update GenerateSessionHash message
traversal to extract text from both formats.

											
										
										
											2026-02-09 06:47:22 +08:00
+										}
-												fix(gateway): 优化 ParseGatewayRequest 函数，使用 unsafe 提高性能并增加 JSON 校验

											
										
										
											2026-02-10 22:12:24 +08:00
 										if contents := gjson.Get(jsonStr, "contents"); contents.Exists() && contents.IsArray() {
 											var msgs []any
 											if err := json.Unmarshal(sliceRawFromBody(body, contents), &msgs); err != nil {
 												return nil, err
 											}
 											parsed.Messages = msgs
-												fix: parse Gemini native request format in ParseGatewayRequest for correct session hash generation

ParseGatewayRequest only parsed Anthropic format (system/messages),
ignoring Gemini native format (systemInstruction/contents). This caused
GenerateSessionHash to produce identical hashes for all Gemini sessions.

Add protocol parameter to ParseGatewayRequest to branch between
Anthropic and Gemini parsing. Update GenerateSessionHash message
traversal to extract text from both formats.

											
										
										
											2026-02-09 06:47:22 +08:00
+										}
 									default:
 										// Anthropic / OpenAI 格式: system / messages
 										// system 字段只要存在就视为显式提供（即使为 null），
 										// 以避免客户端传 null 时被默认 system 误注入。
-												fix(gateway): 优化 ParseGatewayRequest 函数，使用 unsafe 提高性能并增加 JSON 校验

											
										
										
											2026-02-10 22:12:24 +08:00
+										if sys := gjson.Get(jsonStr, "system"); sys.Exists() {
-												fix: parse Gemini native request format in ParseGatewayRequest for correct session hash generation

ParseGatewayRequest only parsed Anthropic format (system/messages),
ignoring Gemini native format (systemInstruction/contents). This caused
GenerateSessionHash to produce identical hashes for all Gemini sessions.

Add protocol parameter to ParseGatewayRequest to branch between
Anthropic and Gemini parsing. Update GenerateSessionHash message
traversal to extract text from both formats.

											
										
										
											2026-02-09 06:47:22 +08:00
+											parsed.HasSystem = true
-												fix(gateway): 优化 ParseGatewayRequest 函数，使用 unsafe 提高性能并增加 JSON 校验

											
										
										
											2026-02-10 22:12:24 +08:00
+											switch sys.Type {
 											case gjson.Null:
 												parsed.System = nil
 											case gjson.String:
 												// 与 encoding/json 的 Unmarshal 行为一致：返回解码后的字符串。
 												parsed.System = sys.String()
 											default:
 												var system any
 												if err := json.Unmarshal(sliceRawFromBody(body, sys), &system); err != nil {
 													return nil, err
 												}
 												parsed.System = system
 											}
-												fix: parse Gemini native request format in ParseGatewayRequest for correct session hash generation

ParseGatewayRequest only parsed Anthropic format (system/messages),
ignoring Gemini native format (systemInstruction/contents). This caused
GenerateSessionHash to produce identical hashes for all Gemini sessions.

Add protocol parameter to ParseGatewayRequest to branch between
Anthropic and Gemini parsing. Update GenerateSessionHash message
traversal to extract text from both formats.

											
										
										
											2026-02-09 06:47:22 +08:00
+										}
-												fix(gateway): 优化 ParseGatewayRequest 函数，使用 unsafe 提高性能并增加 JSON 校验

											
										
										
											2026-02-10 22:12:24 +08:00
 										if msgs := gjson.Get(jsonStr, "messages"); msgs.Exists() && msgs.IsArray() {
 											var messages []any
 											if err := json.Unmarshal(sliceRawFromBody(body, msgs), &messages); err != nil {
 												return nil, err
 											}
-												fix: parse Gemini native request format in ParseGatewayRequest for correct session hash generation

ParseGatewayRequest only parsed Anthropic format (system/messages),
ignoring Gemini native format (systemInstruction/contents). This caused
GenerateSessionHash to produce identical hashes for all Gemini sessions.

Add protocol parameter to ParseGatewayRequest to branch between
Anthropic and Gemini parsing. Update GenerateSessionHash message
traversal to extract text from both formats.

											
										
										
											2026-02-09 06:47:22 +08:00
+											parsed.Messages = messages
 										}
-												perf(后端): 完成性能优化与连接池配置

新增 DB/Redis 连接池配置与校验，并补充单测

网关请求体大小限制与 413 处理

HTTP/req 客户端池化并调整上游连接池默认值

并发槽位改为 ZSET+Lua 与指数退避

用量统计改 SQL 聚合并新增索引迁移

计费缓存写入改工作池并补测试/基准

测试: 在 backend/ 下运行 go test ./...

											
										
										
											2025-12-31 08:50:12 +08:00
+									}
 									return parsed, nil
 								}
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
-												fix(gateway): 优化 ParseGatewayRequest 函数，使用 unsafe 提高性能并增加 JSON 校验

											
										
										
											2026-02-10 22:12:24 +08:00
+								// sliceRawFromBody 返回 Result.Raw 对应的原始字节切片。
 								// 优先使用 Result.Index 直接从 body 切片，避免对大字段（如 messages）产生额外拷贝。
 								// 当 Index 不可用时，退化为复制（理论上极少发生）。
 								func sliceRawFromBody(body []byte, r gjson.Result) []byte {
 									if r.Index > 0 {
 										end := r.Index + len(r.Raw)
 										if end <= len(body) {
 											return body[r.Index:end]
 										}
 									}
 									// fallback: 不影响正确性，但会产生一次拷贝
 									return []byte(r.Raw)
 								}
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+								// FilterThinkingBlocks removes thinking blocks from request body
 								// Returns filtered body or original body if filtering fails (fail-safe)
 								// This prevents 400 errors from invalid thinking block signatures
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+								//
-												[UPDATE] 增强 Claude Thinking 模式支持与 Opus 4.6 动态预算适配

✨ feat(antigravity): 支持 thinking adaptive 类型并适配 Opus 4.6 动态预算
🧪 test(gateway): 增加 thinking 模式解析与签名块过滤的边界用例测试

											
										
										
											2026-02-11 10:31:16 +08:00
+								// 策略：
 								//   - 当 thinking.type 不是 "enabled"/"adaptive"：移除所有 thinking 相关块
 								//   - 当 thinking.type 是 "enabled"/"adaptive"：仅移除缺失/无效 signature 的 thinking 块（避免 400）
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+								//     (blocks with missing/empty/dummy signatures that would cause 400 errors)
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+								func FilterThinkingBlocks(body []byte) []byte {
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+									return filterThinkingBlocksInternal(body, false)
 								}
-												fix(backend): 改进 thinking/tool block 签名处理和重试策略

主要改动：
- request_transformer: thinking block 缺少签名时降级为文本而非丢弃，保留内容并在上层禁用 thinking mode
- antigravity_gateway_service: 新增两阶段降级策略，先处理 thinking blocks，如仍失败且涉及 tool 签名错误则进一步降级 tool blocks
- gateway_request: 新增 FilterSignatureSensitiveBlocksForRetry 函数，支持将 tool_use/tool_result 降级为文本
- gateway_request: 改进 FilterThinkingBlocksForRetry，禁用顶层 thinking 配置以避免结构约束冲突
- gateway_service: 实现保守的两阶段重试逻辑，优先保留内容，仅在必要时降级工具调用
- 新增 antigravity_gateway_service_test.go 测试签名块剥离逻辑
- 更新相关测试用例以验证降级行为

此修复解决了跨平台/账户切换时历史消息签名失效导致的请求失败问题。

											
										
										
											2026-01-04 22:32:36 +08:00
+								// FilterThinkingBlocksForRetry strips thinking-related constructs for retry scenarios.
-												fix(gateway): 优化 thinking block 重试逻辑

- 保留用户的 thinking.type=enabled 设置（不再禁用）
- 只移除历史消息中的 thinking/redacted_thinking blocks
- 处理过滤后空消息：跳过 assistant 消息，user 消息添加占位符
- 增强错误检测：覆盖 signature、Expected thinking、empty content 错误
- 添加重试成功/失败日志便于排查

											
										
										
											2026-01-03 18:05:15 -08:00
+								//
-												fix(backend): 改进 thinking/tool block 签名处理和重试策略

主要改动：
- request_transformer: thinking block 缺少签名时降级为文本而非丢弃，保留内容并在上层禁用 thinking mode
- antigravity_gateway_service: 新增两阶段降级策略，先处理 thinking blocks，如仍失败且涉及 tool 签名错误则进一步降级 tool blocks
- gateway_request: 新增 FilterSignatureSensitiveBlocksForRetry 函数，支持将 tool_use/tool_result 降级为文本
- gateway_request: 改进 FilterThinkingBlocksForRetry，禁用顶层 thinking 配置以避免结构约束冲突
- gateway_service: 实现保守的两阶段重试逻辑，优先保留内容，仅在必要时降级工具调用
- 新增 antigravity_gateway_service_test.go 测试签名块剥离逻辑
- 更新相关测试用例以验证降级行为

此修复解决了跨平台/账户切换时历史消息签名失效导致的请求失败问题。

											
										
										
											2026-01-04 22:32:36 +08:00
+								// Why:
 								//   - Upstreams may reject historical `thinking`/`redacted_thinking` blocks due to invalid/missing signatures.
 								//   - Anthropic extended thinking has a structural constraint: when top-level `thinking` is enabled and the
 								//     final message is an assistant prefill, the assistant content must start with a thinking block.
 								//   - If we remove thinking blocks but keep top-level `thinking` enabled, we can trigger:
-												fix(backend): 修复 CI 失败问题

修复内容：
1. 修复 6 个 golangci-lint 错误
   - 3 个 errcheck 错误：在 gateway_request_test.go 中添加类型断言检查
   - 3 个 gofmt 格式化问题：修复代码格式
2. 修复 API 契约测试失败
   - 在测试中添加缺失的字段：enable_identity_patch 和 identity_patch_prompt

所有测试和 linter 检查现已通过。

											
										
										
											2026-01-05 00:56:48 +08:00
+								//     "Expected `thinking` or `redacted_thinking`, but found `text`"
-												fix(gateway): 优化 thinking block 重试逻辑

- 保留用户的 thinking.type=enabled 设置（不再禁用）
- 只移除历史消息中的 thinking/redacted_thinking blocks
- 处理过滤后空消息：跳过 assistant 消息，user 消息添加占位符
- 增强错误检测：覆盖 signature、Expected thinking、empty content 错误
- 添加重试成功/失败日志便于排查

											
										
										
											2026-01-03 18:05:15 -08:00
+								//
-												fix(backend): 改进 thinking/tool block 签名处理和重试策略

主要改动：
- request_transformer: thinking block 缺少签名时降级为文本而非丢弃，保留内容并在上层禁用 thinking mode
- antigravity_gateway_service: 新增两阶段降级策略，先处理 thinking blocks，如仍失败且涉及 tool 签名错误则进一步降级 tool blocks
- gateway_request: 新增 FilterSignatureSensitiveBlocksForRetry 函数，支持将 tool_use/tool_result 降级为文本
- gateway_request: 改进 FilterThinkingBlocksForRetry，禁用顶层 thinking 配置以避免结构约束冲突
- gateway_service: 实现保守的两阶段重试逻辑，优先保留内容，仅在必要时降级工具调用
- 新增 antigravity_gateway_service_test.go 测试签名块剥离逻辑
- 更新相关测试用例以验证降级行为

此修复解决了跨平台/账户切换时历史消息签名失效导致的请求失败问题。

											
										
										
											2026-01-04 22:32:36 +08:00
+								// Strategy (B: preserve content as text):
 								//   - Disable top-level `thinking` (remove `thinking` field).
 								//   - Convert `thinking` blocks to `text` blocks (preserve the thinking content).
 								//   - Remove `redacted_thinking` blocks (cannot be converted to text).
 								//   - Ensure no message ends up with empty content.
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+								func FilterThinkingBlocksForRetry(body []byte) []byte {
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+									hasThinkingContent := bytes.Contains(body, patternTypeThinking) ||
 										bytes.Contains(body, patternTypeThinkingSpaced) ||
 										bytes.Contains(body, patternTypeRedactedThinking) ||
 										bytes.Contains(body, patternTypeRedactedSpaced) ||
 										bytes.Contains(body, patternThinkingField) ||
 										bytes.Contains(body, patternThinkingFieldSpaced)
-												fix: 修复空content处理及更新Gemini使用指南链接

- 修复FilterThinkingBlocksForRetry对空content数组的处理
- docker-compose添加SECURITY_URL_ALLOWLIST_UPSTREAM_HOSTS配置
- 更新Gemini使用指南链接：检查归属地、修改归属地、激活Gemini Web

											
										
										
											2026-01-04 18:26:39 -08:00
 									// Also check for empty content arrays that need fixing.
 									// Note: This is a heuristic check; the actual empty content handling is done below.
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+									hasEmptyContent := bytes.Contains(body, patternEmptyContent) ||
 										bytes.Contains(body, patternEmptyContentSpaced) ||
 										bytes.Contains(body, patternEmptyContentSp1) ||
 										bytes.Contains(body, patternEmptyContentSp2)
-												fix: 修复空content处理及更新Gemini使用指南链接

- 修复FilterThinkingBlocksForRetry对空content数组的处理
- docker-compose添加SECURITY_URL_ALLOWLIST_UPSTREAM_HOSTS配置
- 更新Gemini使用指南链接：检查归属地、修改归属地、激活Gemini Web

											
										
										
											2026-01-04 18:26:39 -08:00
 									// Fast path: nothing to process
 									if !hasThinkingContent && !hasEmptyContent {
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+										return body
 									}
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+									// 尽量避免把整个 body Unmarshal 成 map（会产生大量 map/接口分配）。
 									// 这里先用 gjson 把 messages 子树摘出来，后续只对 messages 做 Unmarshal/Marshal。
 									jsonStr := *(*string)(unsafe.Pointer(&body))
 									msgsRes := gjson.Get(jsonStr, "messages")
 									if !msgsRes.Exists() || !msgsRes.IsArray() {
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+										return body
 									}
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+									// Fast path：只需要删除顶层 thinking，不需要改 messages。
 									// 注意：patternThinkingField 可能来自嵌套字段（如 tool_use.input.thinking），因此必须用 gjson 判断顶层字段是否存在。
 									containsThinkingBlocks := bytes.Contains(body, patternTypeThinking) ||
 										bytes.Contains(body, patternTypeThinkingSpaced) ||
 										bytes.Contains(body, patternTypeRedactedThinking) ||
 										bytes.Contains(body, patternTypeRedactedSpaced) ||
 										bytes.Contains(body, patternThinkingFieldSpaced)
 									if !hasEmptyContent && !containsThinkingBlocks {
 										if topThinking := gjson.Get(jsonStr, "thinking"); topThinking.Exists() {
 											if out, err := sjson.DeleteBytes(body, "thinking"); err == nil {
-												add test file

											
										
										
											2026-03-08 21:08:09 +08:00
+												out = removeThinkingDependentContextStrategies(out)
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+												return out
 											}
 											return body
 										}
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+										return body
 									}
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+									var messages []any
 									if err := json.Unmarshal(sliceRawFromBody(body, msgsRes), &messages); err != nil {
 										return body
-												fix(backend): 改进 thinking/tool block 签名处理和重试策略

主要改动：
- request_transformer: thinking block 缺少签名时降级为文本而非丢弃，保留内容并在上层禁用 thinking mode
- antigravity_gateway_service: 新增两阶段降级策略，先处理 thinking blocks，如仍失败且涉及 tool 签名错误则进一步降级 tool blocks
- gateway_request: 新增 FilterSignatureSensitiveBlocksForRetry 函数，支持将 tool_use/tool_result 降级为文本
- gateway_request: 改进 FilterThinkingBlocksForRetry，禁用顶层 thinking 配置以避免结构约束冲突
- gateway_service: 实现保守的两阶段重试逻辑，优先保留内容，仅在必要时降级工具调用
- 新增 antigravity_gateway_service_test.go 测试签名块剥离逻辑
- 更新相关测试用例以验证降级行为

此修复解决了跨平台/账户切换时历史消息签名失效导致的请求失败问题。

											
										
										
											2026-01-04 22:32:36 +08:00
+									}
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+									modified := false
 									// Disable top-level thinking mode for retry to avoid structural/signature constraints upstream.
 									deleteTopLevelThinking := gjson.Get(jsonStr, "thinking").Exists()
-												fix(gateway): 优化 thinking block 重试逻辑

- 保留用户的 thinking.type=enabled 设置（不再禁用）
- 只移除历史消息中的 thinking/redacted_thinking blocks
- 处理过滤后空消息：跳过 assistant 消息，user 消息添加占位符
- 增强错误检测：覆盖 signature、Expected thinking、empty content 错误
- 添加重试成功/失败日志便于排查

											
										
										
											2026-01-03 18:05:15 -08:00
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+									for i := 0; i < len(messages); i++ {
 										msgMap, ok := messages[i].(map[string]any)
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+										if !ok {
 											continue
 										}
-												fix(gateway): 优化 thinking block 重试逻辑

- 保留用户的 thinking.type=enabled 设置（不再禁用）
- 只移除历史消息中的 thinking/redacted_thinking blocks
- 处理过滤后空消息：跳过 assistant 消息，user 消息添加占位符
- 增强错误检测：覆盖 signature、Expected thinking、empty content 错误
- 添加重试成功/失败日志便于排查

											
										
										
											2026-01-03 18:05:15 -08:00
+										role, _ := msgMap["role"].(string)
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+										content, ok := msgMap["content"].([]any)
 										if !ok {
-												fix(gateway): 优化 thinking block 重试逻辑

- 保留用户的 thinking.type=enabled 设置（不再禁用）
- 只移除历史消息中的 thinking/redacted_thinking blocks
- 处理过滤后空消息：跳过 assistant 消息，user 消息添加占位符
- 增强错误检测：覆盖 signature、Expected thinking、empty content 错误
- 添加重试成功/失败日志便于排查

											
										
										
											2026-01-03 18:05:15 -08:00
+											// String content or other format - keep as is
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+											continue
 										}
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+										// 延迟分配：只有检测到需要修改的块，才构建新 slice。
 										var newContent []any
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+										modifiedThisMsg := false
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+										ensureNewContent := func(prefixLen int) {
 											if newContent != nil {
 												return
 											}
 											newContent = make([]any, 0, len(content))
 											if prefixLen > 0 {
 												newContent = append(newContent, content[:prefixLen]...)
 											}
 										}
 										for bi := 0; bi < len(content); bi++ {
 											block := content[bi]
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+											blockMap, ok := block.(map[string]any)
 											if !ok {
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+												if newContent != nil {
 													newContent = append(newContent, block)
 												}
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+												continue
 											}
 											blockType, _ := blockMap["type"].(string)
-												fix(backend): 改进 thinking/tool block 签名处理和重试策略

主要改动：
- request_transformer: thinking block 缺少签名时降级为文本而非丢弃，保留内容并在上层禁用 thinking mode
- antigravity_gateway_service: 新增两阶段降级策略，先处理 thinking blocks，如仍失败且涉及 tool 签名错误则进一步降级 tool blocks
- gateway_request: 新增 FilterSignatureSensitiveBlocksForRetry 函数，支持将 tool_use/tool_result 降级为文本
- gateway_request: 改进 FilterThinkingBlocksForRetry，禁用顶层 thinking 配置以避免结构约束冲突
- gateway_service: 实现保守的两阶段重试逻辑，优先保留内容，仅在必要时降级工具调用
- 新增 antigravity_gateway_service_test.go 测试签名块剥离逻辑
- 更新相关测试用例以验证降级行为

此修复解决了跨平台/账户切换时历史消息签名失效导致的请求失败问题。

											
										
										
											2026-01-04 22:32:36 +08:00
+											// Convert thinking blocks to text (preserve content) and drop redacted_thinking.
 											switch blockType {
 											case "thinking":
 												modifiedThisMsg = true
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+												ensureNewContent(bi)
-												fix(backend): 改进 thinking/tool block 签名处理和重试策略

主要改动：
- request_transformer: thinking block 缺少签名时降级为文本而非丢弃，保留内容并在上层禁用 thinking mode
- antigravity_gateway_service: 新增两阶段降级策略，先处理 thinking blocks，如仍失败且涉及 tool 签名错误则进一步降级 tool blocks
- gateway_request: 新增 FilterSignatureSensitiveBlocksForRetry 函数，支持将 tool_use/tool_result 降级为文本
- gateway_request: 改进 FilterThinkingBlocksForRetry，禁用顶层 thinking 配置以避免结构约束冲突
- gateway_service: 实现保守的两阶段重试逻辑，优先保留内容，仅在必要时降级工具调用
- 新增 antigravity_gateway_service_test.go 测试签名块剥离逻辑
- 更新相关测试用例以验证降级行为

此修复解决了跨平台/账户切换时历史消息签名失效导致的请求失败问题。

											
										
										
											2026-01-04 22:32:36 +08:00
+												thinkingText, _ := blockMap["thinking"].(string)
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+												if thinkingText != "" {
 													newContent = append(newContent, map[string]any{"type": "text", "text": thinkingText})
-												fix(backend): 改进 thinking/tool block 签名处理和重试策略

主要改动：
- request_transformer: thinking block 缺少签名时降级为文本而非丢弃，保留内容并在上层禁用 thinking mode
- antigravity_gateway_service: 新增两阶段降级策略，先处理 thinking blocks，如仍失败且涉及 tool 签名错误则进一步降级 tool blocks
- gateway_request: 新增 FilterSignatureSensitiveBlocksForRetry 函数，支持将 tool_use/tool_result 降级为文本
- gateway_request: 改进 FilterThinkingBlocksForRetry，禁用顶层 thinking 配置以避免结构约束冲突
- gateway_service: 实现保守的两阶段重试逻辑，优先保留内容，仅在必要时降级工具调用
- 新增 antigravity_gateway_service_test.go 测试签名块剥离逻辑
- 更新相关测试用例以验证降级行为

此修复解决了跨平台/账户切换时历史消息签名失效导致的请求失败问题。

											
										
										
											2026-01-04 22:32:36 +08:00
+												}
 												continue
 											case "redacted_thinking":
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+												modifiedThisMsg = true
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+												ensureNewContent(bi)
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+												continue
 											}
-												fix(backend): 改进 thinking/tool block 签名处理和重试策略

主要改动：
- request_transformer: thinking block 缺少签名时降级为文本而非丢弃，保留内容并在上层禁用 thinking mode
- antigravity_gateway_service: 新增两阶段降级策略，先处理 thinking blocks，如仍失败且涉及 tool 签名错误则进一步降级 tool blocks
- gateway_request: 新增 FilterSignatureSensitiveBlocksForRetry 函数，支持将 tool_use/tool_result 降级为文本
- gateway_request: 改进 FilterThinkingBlocksForRetry，禁用顶层 thinking 配置以避免结构约束冲突
- gateway_service: 实现保守的两阶段重试逻辑，优先保留内容，仅在必要时降级工具调用
- 新增 antigravity_gateway_service_test.go 测试签名块剥离逻辑
- 更新相关测试用例以验证降级行为

此修复解决了跨平台/账户切换时历史消息签名失效导致的请求失败问题。

											
										
										
											2026-01-04 22:32:36 +08:00
+											// Handle blocks without type discriminator but with a "thinking" field.
 											if blockType == "" {
 												if rawThinking, hasThinking := blockMap["thinking"]; hasThinking {
 													modifiedThisMsg = true
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+													ensureNewContent(bi)
-												fix(backend): 改进 thinking/tool block 签名处理和重试策略

主要改动：
- request_transformer: thinking block 缺少签名时降级为文本而非丢弃，保留内容并在上层禁用 thinking mode
- antigravity_gateway_service: 新增两阶段降级策略，先处理 thinking blocks，如仍失败且涉及 tool 签名错误则进一步降级 tool blocks
- gateway_request: 新增 FilterSignatureSensitiveBlocksForRetry 函数，支持将 tool_use/tool_result 降级为文本
- gateway_request: 改进 FilterThinkingBlocksForRetry，禁用顶层 thinking 配置以避免结构约束冲突
- gateway_service: 实现保守的两阶段重试逻辑，优先保留内容，仅在必要时降级工具调用
- 新增 antigravity_gateway_service_test.go 测试签名块剥离逻辑
- 更新相关测试用例以验证降级行为

此修复解决了跨平台/账户切换时历史消息签名失效导致的请求失败问题。

											
										
										
											2026-01-04 22:32:36 +08:00
+													switch v := rawThinking.(type) {
 													case string:
 														if v != "" {
 															newContent = append(newContent, map[string]any{"type": "text", "text": v})
 														}
 													default:
 														if b, err := json.Marshal(v); err == nil && len(b) > 0 {
 															newContent = append(newContent, map[string]any{"type": "text", "text": string(b)})
 														}
 													}
 													continue
 												}
 											}
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+											if newContent != nil {
 												newContent = append(newContent, block)
 											}
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+										}
-												fix: 修复空content处理及更新Gemini使用指南链接

- 修复FilterThinkingBlocksForRetry对空content数组的处理
- docker-compose添加SECURITY_URL_ALLOWLIST_UPSTREAM_HOSTS配置
- 更新Gemini使用指南链接：检查归属地、修改归属地、激活Gemini Web

											
										
										
											2026-01-04 18:26:39 -08:00
+										// Handle empty content: either from filtering or originally empty
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+										if newContent == nil {
 											if len(content) == 0 {
 												modified = true
 												placeholder := "(content removed)"
 												if role == "assistant" {
 													placeholder = "(assistant content removed)"
 												}
 												msgMap["content"] = []any{map[string]any{"type": "text", "text": placeholder}}
 											}
 											continue
 										}
-												fix: 修复空content处理及更新Gemini使用指南链接

- 修复FilterThinkingBlocksForRetry对空content数组的处理
- docker-compose添加SECURITY_URL_ALLOWLIST_UPSTREAM_HOSTS配置
- 更新Gemini使用指南链接：检查归属地、修改归属地、激活Gemini Web

											
										
										
											2026-01-04 18:26:39 -08:00
+										if len(newContent) == 0 {
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+											modified = true
-												fix: 修复空content处理及更新Gemini使用指南链接

- 修复FilterThinkingBlocksForRetry对空content数组的处理
- docker-compose添加SECURITY_URL_ALLOWLIST_UPSTREAM_HOSTS配置
- 更新Gemini使用指南链接：检查归属地、修改归属地、激活Gemini Web

											
										
										
											2026-01-04 18:26:39 -08:00
+											placeholder := "(content removed)"
 											if role == "assistant" {
 												placeholder = "(assistant content removed)"
-												fix(gateway): 优化 thinking block 重试逻辑

- 保留用户的 thinking.type=enabled 设置（不再禁用）
- 只移除历史消息中的 thinking/redacted_thinking blocks
- 处理过滤后空消息：跳过 assistant 消息，user 消息添加占位符
- 增强错误检测：覆盖 signature、Expected thinking、empty content 错误
- 添加重试成功/失败日志便于排查

											
										
										
											2026-01-03 18:05:15 -08:00
+											}
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+											msgMap["content"] = []any{map[string]any{"type": "text", "text": placeholder}}
 											continue
 										}
 										if modifiedThisMsg {
-												fix: 修复空content处理及更新Gemini使用指南链接

- 修复FilterThinkingBlocksForRetry对空content数组的处理
- docker-compose添加SECURITY_URL_ALLOWLIST_UPSTREAM_HOSTS配置
- 更新Gemini使用指南链接：检查归属地、修改归属地、激活Gemini Web

											
										
										
											2026-01-04 18:26:39 -08:00
+											modified = true
-												fix(gateway): 优化 thinking block 重试逻辑

- 保留用户的 thinking.type=enabled 设置（不再禁用）
- 只移除历史消息中的 thinking/redacted_thinking blocks
- 处理过滤后空消息：跳过 assistant 消息，user 消息添加占位符
- 增强错误检测：覆盖 signature、Expected thinking、empty content 错误
- 添加重试成功/失败日志便于排查

											
										
										
											2026-01-03 18:05:15 -08:00
+											msgMap["content"] = newContent
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+										}
 									}
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+									if !modified && !deleteTopLevelThinking {
-												fix(backend): 改进 thinking/tool block 签名处理和重试策略

主要改动：
- request_transformer: thinking block 缺少签名时降级为文本而非丢弃，保留内容并在上层禁用 thinking mode
- antigravity_gateway_service: 新增两阶段降级策略，先处理 thinking blocks，如仍失败且涉及 tool 签名错误则进一步降级 tool blocks
- gateway_request: 新增 FilterSignatureSensitiveBlocksForRetry 函数，支持将 tool_use/tool_result 降级为文本
- gateway_request: 改进 FilterThinkingBlocksForRetry，禁用顶层 thinking 配置以避免结构约束冲突
- gateway_service: 实现保守的两阶段重试逻辑，优先保留内容，仅在必要时降级工具调用
- 新增 antigravity_gateway_service_test.go 测试签名块剥离逻辑
- 更新相关测试用例以验证降级行为

此修复解决了跨平台/账户切换时历史消息签名失效导致的请求失败问题。

											
										
										
											2026-01-04 22:32:36 +08:00
+										// Avoid rewriting JSON when no changes are needed.
 										return body
 									}
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+									out := body
 									if deleteTopLevelThinking {
 										if b, err := sjson.DeleteBytes(out, "thinking"); err == nil {
 											out = b
 										} else {
 											return body
 										}
-												fix issue #851

											
										
										
											2026-03-08 21:00:34 +08:00
+										// Removing "thinking" makes any context_management strategy that requires it invalid
 										// (e.g. clear_thinking_20251015).  Strip those entries so the retry request does not
 										// receive a 400 "strategy requires thinking to be enabled or adaptive".
 										out = removeThinkingDependentContextStrategies(out)
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+									}
 									if modified {
 										msgsBytes, err := json.Marshal(messages)
 										if err != nil {
 											return body
 										}
 										out, err = sjson.SetRawBytes(out, "messages", msgsBytes)
 										if err != nil {
 											return body
 										}
-												fix(backend): 改进 thinking/tool block 签名处理和重试策略

主要改动：
- request_transformer: thinking block 缺少签名时降级为文本而非丢弃，保留内容并在上层禁用 thinking mode
- antigravity_gateway_service: 新增两阶段降级策略，先处理 thinking blocks，如仍失败且涉及 tool 签名错误则进一步降级 tool blocks
- gateway_request: 新增 FilterSignatureSensitiveBlocksForRetry 函数，支持将 tool_use/tool_result 降级为文本
- gateway_request: 改进 FilterThinkingBlocksForRetry，禁用顶层 thinking 配置以避免结构约束冲突
- gateway_service: 实现保守的两阶段重试逻辑，优先保留内容，仅在必要时降级工具调用
- 新增 antigravity_gateway_service_test.go 测试签名块剥离逻辑
- 更新相关测试用例以验证降级行为

此修复解决了跨平台/账户切换时历史消息签名失效导致的请求失败问题。

											
										
										
											2026-01-04 22:32:36 +08:00
+									}
-												perf(service): 优化重试场景 thinking 过滤性能

- 避免全量 Unmarshal 请求体，改为仅解析 messages 子树

- 顶层 thinking 使用 sjson 直接删除，减少整体重写

- content 仅在需要修改时延迟分配 new slice

- 增加 FilterThinkingBlocksForRetry 基准测试

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

											
										
										
											2026-02-11 00:47:26 +08:00
+									return out
-												fix(backend): 改进 thinking/tool block 签名处理和重试策略

主要改动：
- request_transformer: thinking block 缺少签名时降级为文本而非丢弃，保留内容并在上层禁用 thinking mode
- antigravity_gateway_service: 新增两阶段降级策略，先处理 thinking blocks，如仍失败且涉及 tool 签名错误则进一步降级 tool blocks
- gateway_request: 新增 FilterSignatureSensitiveBlocksForRetry 函数，支持将 tool_use/tool_result 降级为文本
- gateway_request: 改进 FilterThinkingBlocksForRetry，禁用顶层 thinking 配置以避免结构约束冲突
- gateway_service: 实现保守的两阶段重试逻辑，优先保留内容，仅在必要时降级工具调用
- 新增 antigravity_gateway_service_test.go 测试签名块剥离逻辑
- 更新相关测试用例以验证降级行为

此修复解决了跨平台/账户切换时历史消息签名失效导致的请求失败问题。

											
										
										
											2026-01-04 22:32:36 +08:00
+								}
-												fix issue #851

											
										
										
											2026-03-08 21:00:34 +08:00
+								// removeThinkingDependentContextStrategies 从 context_management.edits 中移除
 								// 需要 thinking 启用的策略（如 clear_thinking_20251015）。
 								// 当顶层 "thinking" 字段被禁用时必须调用，否则上游会返回
 								// "strategy requires thinking to be enabled or adaptive"。
 								func removeThinkingDependentContextStrategies(body []byte) []byte {
 									jsonStr := *(*string)(unsafe.Pointer(&body))
 									editsRes := gjson.Get(jsonStr, "context_management.edits")
 									if !editsRes.Exists() || !editsRes.IsArray() {
 										return body
 									}
 									var filtered []json.RawMessage
 									hasRemoved := false
 									editsRes.ForEach(func(_, v gjson.Result) bool {
 										if v.Get("type").String() == "clear_thinking_20251015" {
 											hasRemoved = true
 											return true
 										}
 										filtered = append(filtered, json.RawMessage(v.Raw))
 										return true
 									})
 									if !hasRemoved {
 										return body
 									}
 									if len(filtered) == 0 {
 										if b, err := sjson.DeleteBytes(body, "context_management.edits"); err == nil {
 											return b
 										}
 										return body
 									}
 									filteredBytes, err := json.Marshal(filtered)
 									if err != nil {
 										return body
 									}
 									if b, err := sjson.SetRawBytes(body, "context_management.edits", filteredBytes); err == nil {
 										return b
 									}
 									return body
 								}
-												fix(backend): 改进 thinking/tool block 签名处理和重试策略

主要改动：
- request_transformer: thinking block 缺少签名时降级为文本而非丢弃，保留内容并在上层禁用 thinking mode
- antigravity_gateway_service: 新增两阶段降级策略，先处理 thinking blocks，如仍失败且涉及 tool 签名错误则进一步降级 tool blocks
- gateway_request: 新增 FilterSignatureSensitiveBlocksForRetry 函数，支持将 tool_use/tool_result 降级为文本
- gateway_request: 改进 FilterThinkingBlocksForRetry，禁用顶层 thinking 配置以避免结构约束冲突
- gateway_service: 实现保守的两阶段重试逻辑，优先保留内容，仅在必要时降级工具调用
- 新增 antigravity_gateway_service_test.go 测试签名块剥离逻辑
- 更新相关测试用例以验证降级行为

此修复解决了跨平台/账户切换时历史消息签名失效导致的请求失败问题。

											
										
										
											2026-01-04 22:32:36 +08:00
+								// FilterSignatureSensitiveBlocksForRetry is a stronger retry filter for cases where upstream errors indicate
 								// signature/thought_signature validation issues involving tool blocks.
 								//
 								// This performs everything in FilterThinkingBlocksForRetry, plus:
 								//   - Convert `tool_use` blocks to text (name/id/input) so we stop sending structured tool calls.
 								//   - Convert `tool_result` blocks to text so we keep tool results visible without tool semantics.
 								//
 								// Use this only when needed: converting tool blocks to text changes model behaviour and can increase the
 								// risk of prompt injection (tool output becomes plain conversation text).
 								func FilterSignatureSensitiveBlocksForRetry(body []byte) []byte {
 									// Fast path: only run when we see likely relevant constructs.
 									if !bytes.Contains(body, []byte(`"type":"thinking"`)) &&
 										!bytes.Contains(body, []byte(`"type": "thinking"`)) &&
 										!bytes.Contains(body, []byte(`"type":"redacted_thinking"`)) &&
 										!bytes.Contains(body, []byte(`"type": "redacted_thinking"`)) &&
 										!bytes.Contains(body, []byte(`"type":"tool_use"`)) &&
 										!bytes.Contains(body, []byte(`"type": "tool_use"`)) &&
 										!bytes.Contains(body, []byte(`"type":"tool_result"`)) &&
 										!bytes.Contains(body, []byte(`"type": "tool_result"`)) &&
 										!bytes.Contains(body, []byte(`"thinking":`)) &&
 										!bytes.Contains(body, []byte(`"thinking" :`)) {
 										return body
 									}
 									var req map[string]any
 									if err := json.Unmarshal(body, &req); err != nil {
 										return body
 									}
 									modified := false
 									// Disable top-level thinking for retry to avoid structural/signature constraints upstream.
 									if _, exists := req["thinking"]; exists {
 										delete(req, "thinking")
 										modified = true
-												fix issue #851

											
										
										
											2026-03-08 21:00:34 +08:00
+										// Remove context_management strategies that require thinking to be enabled
 										// (e.g. clear_thinking_20251015), otherwise upstream returns 400.
 										if cm, ok := req["context_management"].(map[string]any); ok {
 											if edits, ok := cm["edits"].([]any); ok {
 												filtered := make([]any, 0, len(edits))
 												for _, edit := range edits {
 													if editMap, ok := edit.(map[string]any); ok {
 														if editMap["type"] == "clear_thinking_20251015" {
 															continue
 														}
 													}
 													filtered = append(filtered, edit)
 												}
 												if len(filtered) != len(edits) {
-												Update backend/internal/service/gateway_request.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
											
										
										
											2026-03-08 21:21:28 +08:00
+													if len(filtered) == 0 {
 														delete(cm, "edits")
 													} else {
 														cm["edits"] = filtered
 													}
-												fix issue #851

											
										
										
											2026-03-08 21:00:34 +08:00
+												}
 											}
 										}
-												fix(backend): 改进 thinking/tool block 签名处理和重试策略

主要改动：
- request_transformer: thinking block 缺少签名时降级为文本而非丢弃，保留内容并在上层禁用 thinking mode
- antigravity_gateway_service: 新增两阶段降级策略，先处理 thinking blocks，如仍失败且涉及 tool 签名错误则进一步降级 tool blocks
- gateway_request: 新增 FilterSignatureSensitiveBlocksForRetry 函数，支持将 tool_use/tool_result 降级为文本
- gateway_request: 改进 FilterThinkingBlocksForRetry，禁用顶层 thinking 配置以避免结构约束冲突
- gateway_service: 实现保守的两阶段重试逻辑，优先保留内容，仅在必要时降级工具调用
- 新增 antigravity_gateway_service_test.go 测试签名块剥离逻辑
- 更新相关测试用例以验证降级行为

此修复解决了跨平台/账户切换时历史消息签名失效导致的请求失败问题。

											
										
										
											2026-01-04 22:32:36 +08:00
+									}
 									messages, ok := req["messages"].([]any)
 									if !ok {
 										return body
 									}
 									newMessages := make([]any, 0, len(messages))
 									for _, msg := range messages {
 										msgMap, ok := msg.(map[string]any)
 										if !ok {
 											newMessages = append(newMessages, msg)
 											continue
 										}
 										role, _ := msgMap["role"].(string)
 										content, ok := msgMap["content"].([]any)
 										if !ok {
 											newMessages = append(newMessages, msg)
 											continue
 										}
 										newContent := make([]any, 0, len(content))
 										modifiedThisMsg := false
 										for _, block := range content {
 											blockMap, ok := block.(map[string]any)
 											if !ok {
 												newContent = append(newContent, block)
 												continue
 											}
 											blockType, _ := blockMap["type"].(string)
 											switch blockType {
 											case "thinking":
 												modifiedThisMsg = true
 												thinkingText, _ := blockMap["thinking"].(string)
 												if thinkingText == "" {
 													continue
 												}
 												newContent = append(newContent, map[string]any{"type": "text", "text": thinkingText})
 												continue
 											case "redacted_thinking":
 												modifiedThisMsg = true
 												continue
 											case "tool_use":
 												modifiedThisMsg = true
 												name, _ := blockMap["name"].(string)
 												id, _ := blockMap["id"].(string)
 												input := blockMap["input"]
 												inputJSON, _ := json.Marshal(input)
 												text := "(tool_use)"
 												if name != "" {
 													text += " name=" + name
 												}
 												if id != "" {
 													text += " id=" + id
 												}
 												if len(inputJSON) > 0 && string(inputJSON) != "null" {
 													text += " input=" + string(inputJSON)
 												}
 												newContent = append(newContent, map[string]any{"type": "text", "text": text})
 												continue
 											case "tool_result":
 												modifiedThisMsg = true
 												toolUseID, _ := blockMap["tool_use_id"].(string)
 												isError, _ := blockMap["is_error"].(bool)
 												content := blockMap["content"]
 												contentJSON, _ := json.Marshal(content)
 												text := "(tool_result)"
 												if toolUseID != "" {
 													text += " tool_use_id=" + toolUseID
 												}
 												if isError {
 													text += " is_error=true"
 												}
 												if len(contentJSON) > 0 && string(contentJSON) != "null" {
 													text += "\n" + string(contentJSON)
 												}
 												newContent = append(newContent, map[string]any{"type": "text", "text": text})
 												continue
 											}
 											if blockType == "" {
 												if rawThinking, hasThinking := blockMap["thinking"]; hasThinking {
 													modifiedThisMsg = true
 													switch v := rawThinking.(type) {
 													case string:
 														if v != "" {
 															newContent = append(newContent, map[string]any{"type": "text", "text": v})
 														}
 													default:
 														if b, err := json.Marshal(v); err == nil && len(b) > 0 {
 															newContent = append(newContent, map[string]any{"type": "text", "text": string(b)})
 														}
 													}
 													continue
 												}
 											}
 											newContent = append(newContent, block)
 										}
 										if modifiedThisMsg {
 											modified = true
 											if len(newContent) == 0 {
 												placeholder := "(content removed)"
 												if role == "assistant" {
 													placeholder = "(assistant content removed)"
 												}
 												newContent = append(newContent, map[string]any{"type": "text", "text": placeholder})
 											}
 											msgMap["content"] = newContent
 										}
 										newMessages = append(newMessages, msgMap)
 									}
 									if !modified {
 										return body
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+									}
-												fix(backend): 改进 thinking/tool block 签名处理和重试策略

主要改动：
- request_transformer: thinking block 缺少签名时降级为文本而非丢弃，保留内容并在上层禁用 thinking mode
- antigravity_gateway_service: 新增两阶段降级策略，先处理 thinking blocks，如仍失败且涉及 tool 签名错误则进一步降级 tool blocks
- gateway_request: 新增 FilterSignatureSensitiveBlocksForRetry 函数，支持将 tool_use/tool_result 降级为文本
- gateway_request: 改进 FilterThinkingBlocksForRetry，禁用顶层 thinking 配置以避免结构约束冲突
- gateway_service: 实现保守的两阶段重试逻辑，优先保留内容，仅在必要时降级工具调用
- 新增 antigravity_gateway_service_test.go 测试签名块剥离逻辑
- 更新相关测试用例以验证降级行为

此修复解决了跨平台/账户切换时历史消息签名失效导致的请求失败问题。

											
										
										
											2026-01-04 22:32:36 +08:00
+									req["messages"] = newMessages
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+									newBody, err := json.Marshal(req)
 									if err != nil {
 										return body
 									}
 									return newBody
 								}
 								// filterThinkingBlocksInternal removes invalid thinking blocks from request
-												[UPDATE] 增强 Claude Thinking 模式支持与 Opus 4.6 动态预算适配

✨ feat(antigravity): 支持 thinking adaptive 类型并适配 Opus 4.6 动态预算
🧪 test(gateway): 增加 thinking 模式解析与签名块过滤的边界用例测试

											
										
										
											2026-02-11 10:31:16 +08:00
+								// 策略：
 								//   - 当 thinking.type 不是 "enabled"/"adaptive"：移除所有 thinking 相关块
 								//   - 当 thinking.type 是 "enabled"/"adaptive"：仅移除缺失/无效 signature 的 thinking 块
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+								func filterThinkingBlocksInternal(body []byte, _ bool) []byte {
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+									// Fast path: if body doesn't contain "thinking", skip parsing
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+									if !bytes.Contains(body, []byte(`"type":"thinking"`)) &&
 										!bytes.Contains(body, []byte(`"type": "thinking"`)) &&
 										!bytes.Contains(body, []byte(`"type":"redacted_thinking"`)) &&
 										!bytes.Contains(body, []byte(`"type": "redacted_thinking"`)) &&
 										!bytes.Contains(body, []byte(`"thinking":`)) &&
 										!bytes.Contains(body, []byte(`"thinking" :`)) {
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+										return body
 									}
 									var req map[string]any
 									if err := json.Unmarshal(body, &req); err != nil {
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+										return body
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+									}
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+									// Check if thinking is enabled
 									thinkingEnabled := false
 									if thinking, ok := req["thinking"].(map[string]any); ok {
-												[UPDATE] 增强 Claude Thinking 模式支持与 Opus 4.6 动态预算适配

✨ feat(antigravity): 支持 thinking adaptive 类型并适配 Opus 4.6 动态预算
🧪 test(gateway): 增加 thinking 模式解析与签名块过滤的边界用例测试

											
										
										
											2026-02-11 10:31:16 +08:00
+										if thinkType, ok := thinking["type"].(string); ok && (thinkType == "enabled" || thinkType == "adaptive") {
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+											thinkingEnabled = true
 										}
 									}
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+									messages, ok := req["messages"].([]any)
 									if !ok {
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+										return body
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+									}
 									filtered := false
 									for _, msg := range messages {
 										msgMap, ok := msg.(map[string]any)
 										if !ok {
 											continue
 										}
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+										role, _ := msgMap["role"].(string)
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+										content, ok := msgMap["content"].([]any)
 										if !ok {
 											continue
 										}
 										newContent := make([]any, 0, len(content))
 										filteredThisMessage := false
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+										for _, block := range content {
 											blockMap, ok := block.(map[string]any)
 											if !ok {
 												newContent = append(newContent, block)
 												continue
 											}
 											blockType, _ := blockMap["type"].(string)
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
 											if blockType == "thinking" || blockType == "redacted_thinking" {
 												// When thinking is enabled and this is an assistant message,
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+												// only keep thinking blocks with valid signatures
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+												if thinkingEnabled && role == "assistant" {
 													signature, _ := blockMap["signature"].(string)
-												feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes:
- Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching
- Unified rate limiting: scope-level → model-level with Redis snapshot sync
- Load-balanced scheduling by call count with smart retry mechanism
- Force cache billing support
- Model identity injection in prompts with leak prevention
- Thinking mode auto-handling (max_tokens/budget_tokens fix)
- Frontend: whitelist mode toggle, model mapping validation, status indicators
- Gemini session fallback with Redis Trie O(L) matching
- Ops: enhanced concurrency monitoring, account availability, retry logic
- Migration scripts: 049-051 for model mapping unification

											
										
										
											2026-02-07 12:31:10 +08:00
+													if signature != "" && signature != antigravity.DummyThoughtSignature {
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+														newContent = append(newContent, block)
 														continue
 													}
 												}
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+												filtered = true
 												filteredThisMessage = true
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+												continue
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+											}
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+											// Handle blocks without type discriminator but with "thinking" key
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+											if blockType == "" {
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+												if _, hasThinking := blockMap["thinking"]; hasThinking {
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+													filtered = true
 													filteredThisMessage = true
-												perf(gateway): 优化负载感知调度

主要改进：
- 优化负载感知调度的准确性和响应速度
- 将 AccountUsageService 的包级缓存改为依赖注入
- 修复 SSE/JSON 转义和 nil 安全问题
- 恢复 Google One 功能兼容性

											
										
										
											2026-01-03 06:32:51 -08:00
+													continue
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+												}
 											}
 											newContent = append(newContent, block)
 										}
 										if filteredThisMessage {
 											msgMap["content"] = newContent
 										}
 									}
 									if !filtered {
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+										return body
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+									}
 									newBody, err := json.Marshal(req)
 									if err != nil {
-												fix(thinking): 优化 thinking block 签名错误重试逻辑

- FilterThinkingBlocksForRetry: 将 thinking block 转换为 text block 而非直接删除
- stripThinkingFromClaudeRequest: Antigravity 网关同步采用转换策略
- 统一处理 thinking/redacted_thinking/无 type 字段的 thinking block
- 保留 thinking 内容，避免上下文丢失

											
										
										
											2026-01-03 17:07:54 -08:00
+										return body
-												Fix/multiple issues (#24)

* fix(gemini): 修复 google_one OAuth 配置和 scopes 问题

- 修复 google_one 类型在 ExchangeCode 和 RefreshToken 中使用内置客户端
- 添加 DefaultGoogleOneScopes，包含 generative-language 和 drive.readonly 权限
- 在 EffectiveOAuthConfig 中为 google_one 类型使用专门的 scopes
- 将 docker-compose.override.yml 重命名为 .example 并添加到 .gitignore
- 完善 docker-compose.override.yml.example 示例文档

解决问题：
1. google_one OAuth 授权后 API 调用返回 403 权限不足
2. 缺少访问 Gemini API 所需的 generative-language scope
3. 缺少获取 Drive 存储配额所需的 drive.readonly scope

* fix(antigravity): 完全跳过 Claude 模型的所有 thinking 块

问题分析：
- 当前代码尝试保留有 signature 的 thinking 块
- 但 Vertex AI 的 signature 是完整性令牌，无法在本地验证
- 导致 400 错误：Invalid signature in thinking block

根本原因：
1. thinking 功能已对非 Gemini 模型禁用 (isThinkingEnabled=false)
2. Vertex AI 要求原样重放 (thinking, signature) 对或完全不发送
3. 本地无法复制 Vertex 的加密验证逻辑

修复方案：
- 对 Claude 模型完全跳过所有 thinking 块（无论是否有 signature）
- 保持 Gemini 模型使用 dummy signature 的行为不变
- 更新测试用例以反映新的预期行为

影响：
- 消除 thinking 相关的 400 错误
- 与现有的 thinking 禁用策略保持一致
- 不影响 Gemini 模型的 thinking 功能

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestBuildTools_CustomTypeTools 全部通过

参考：Codex review 建议

* fix(gateway): 修复 count_tokens 端点 400 错误

问题分析：
- count_tokens 请求包含 thinking 块时返回 400 错误
- 原因：thinking 块未被过滤，直接转发到上游 API
- 上游 API 拒绝无效的 thinking signature

根本原因：
1. /v1/messages 请求通过 TransformClaudeToGemini 过滤 thinking 块
2. count_tokens 请求绕过转换，直接转发原始请求体
3. 导致包含无效 signature 的 thinking 块被发送到上游

修复方案：
- 创建 FilterThinkingBlocks 工具函数
- 在 buildCountTokensRequest 中应用过滤（1 行修改）
- 与 /v1/messages 行为保持一致

实现细节：
- FilterThinkingBlocks: 解析 JSON，过滤 thinking 块，重新序列化
- 失败安全：解析/序列化失败时返回原始请求体
- 性能优化：仅在发现 thinking 块时重新序列化

测试：
- ✅ 6 个单元测试全部通过
- ✅ 覆盖正常过滤、无 thinking 块、无效 JSON 等场景
- ✅ 现有测试不受影响

影响：
- 消除 count_tokens 的 400 错误
- 不影响 Antigravity 账号（仍返回模拟响应）
- 适用于所有账号类型（OAuth、API Key）

文件修改：
- backend/internal/service/gateway_request.go: +62 行（新函数）
- backend/internal/service/gateway_service.go: +2 行（应用过滤）
- backend/internal/service/gateway_request_test.go: +62 行（测试）

* fix(gateway): 增强 thinking 块过滤逻辑

基于 Codex 分析和建议的改进：

问题分析：
- 新错误：signature: Field required（signature 字段缺失）
- 旧错误：Invalid signature（signature 存在但无效）
- 两者都说明 thinking 块在请求中是危险的

Codex 建议：
- 保持 Option A：完全跳过所有 thinking 块
- 原因：thinking 块应该是只输出的，除非有服务端来源证明
- 在无状态代理中，无法安全区分上游来源 vs 客户端注入

改进内容：

1. 增强 FilterThinkingBlocks 函数
   - 过滤显式的 thinking 块：{"type":"thinking", ...}
   - 过滤无 type 的 thinking 对象：{"thinking": {...}}
   - 保留 tool_use 等其他类型块中的 thinking 字段
   - 修复：只在实际过滤时更新 content 数组

2. 扩展过滤范围
   - 将 FilterThinkingBlocks 应用到 /v1/messages 主路径
   - 之前只应用于 count_tokens，现在两个端点都过滤
   - 防止所有端点的 thinking 相关 400 错误

3. 改进测试
   - 新增：过滤无 type discriminator 的 thinking 块
   - 新增：不过滤 tool_use 中的 thinking 字段
   - 使用 containsThinkingBlock 辅助函数验证

测试：
- ✅ 8 个测试用例全部通过
- ✅ 覆盖各种 thinking 块格式
- ✅ 确保不误伤其他类型的块

影响：
- 消除 signature required 和 invalid signature 错误
- 统一 /v1/messages 和 count_tokens 的行为
- 更健壮的 thinking 块检测逻辑

参考：Codex review 和代码改进

* refactor: 根据 Codex 审查建议进行代码优化

基于 Codex 代码审查的 P1 和 P2 改进：

P1 改进（重要问题）：

1. 优化日志输出
   - 移除 thinking 块跳过时的 log.Printf
   - 避免高频请求下的日志噪音
   - 添加注释说明可通过指标监控

2. 清理遗留代码
   - 删除未使用的 isValidThoughtSignature 函数（27行）
   - 该函数在改为完全跳过 thinking 块后不再需要

P2 改进（性能优化）：

3. 添加快速路径检查
   - 在 FilterThinkingBlocks 中添加 bytes.Contains 预检查
   - 如果请求体不包含 "thinking" 字符串，直接返回
   - 避免不必要的 JSON 解析，提升性能

技术细节：
- request_transformer.go: -27行（删除函数），+1行（优化注释）
- gateway_request.go: +5行（快速路径 + bytes 导入）

测试：
- ✅ TestBuildParts_ThinkingBlockWithoutSignature 全部通过
- ✅ TestFilterThinkingBlocks 全部通过（8个测试用例）

影响：
- 减少日志噪音
- 提升性能（快速路径）
- 代码更简洁（删除未使用代码）

参考：Codex 代码审查建议

* fix: 修复 golangci-lint 检查问题

- 格式化 gateway_request_test.go
- 使用 switch 语句替代 if-else 链（staticcheck QF1003）

* fix(antigravity): 修复 thinking signature 处理并实现 Auto 模式降级

问题分析：
1. 原先代码错误地禁用了 Claude via Vertex 的 thinkingConfig
2. 历史 thinking 块的 signature 被完全跳过，导致验证失败
3. 跨模型混用时 dummy signature 会导致 400 错误

修复内容：

**request_transformer.go**：
- 删除第 38-43 行的错误逻辑（禁用 thinkingConfig）
- 引入 thoughtSignatureMode（Preserve/Dummy）策略
- Claude 模式：透传真实 signature，过滤空/dummy
- Gemini 模式：使用 dummy signature
- 支持 signature-only thinking 块
- tool_use 的 signature 也透传

**antigravity_gateway_service.go**：
- 新增 isSignatureRelatedError() 检测 signature 相关错误
- 新增 stripThinkingFromClaudeRequest() 移除 thinking 块
- 实现 Auto 模式：检测 400 + signature 关键词时自动降级重试
- 重试时完全移除 thinking 配置和消息中的 thinking 块
- 最多重试一次，避免循环

**测试**：
- 更新并新增测试覆盖 Claude preserve/Gemini dummy 模式
- 新增 tool_use signature 处理测试
- 所有测试通过（6/6）

影响：
- ✅ Claude via Vertex 可以正常使用 thinking 功能
- ✅ 历史 signature 正确透传，避免验证失败
- ✅ 跨模型混用时自动过滤无效 signature
- ✅ 错误驱动降级，自动修复 signature 问题
- ✅ 不影响纯 Claude API 和其他渠道

参考：Codex 深度分析和实现建议

* fix(lint): 修复 gofmt 格式问题

* fix(antigravity): 修复 stripThinkingFromClaudeRequest 遗漏 untyped thinking blocks

问题：
- Codex 审查指出 stripThinkingFromClaudeRequest 只移除了 type="thinking" 的块
- 没有处理没有 type 字段的 thinking 对象（如 {"thinking": "...", "signature": "..."}）
- 导致重试时仍包含无效 thinking 块，上游 400 错误持续

修复：
- 添加检查：跳过没有 type 但有 thinking 字段的块
- 现在会移除两种格式：
  1. {"type": "thinking", "thinking": "...", "signature": "..."}
  2. {"thinking": "...", "signature": "..."}（untyped）

测试：所有测试通过

参考：Codex P1 审查意见
											
										
										
											2026-01-02 17:47:49 +08:00
+									}
 									return newBody
 								}
-												feat: 支持后台设置是否启用整流开关

											
										
										
											2026-03-07 21:45:18 +08:00
 								// =========================
 								// Thinking Budget Rectifier
 								// =========================
 								const (
 									// BudgetRectifyBudgetTokens is the budget_tokens value to set when rectifying.
 									BudgetRectifyBudgetTokens = 32000
 									// BudgetRectifyMaxTokens is the max_tokens value to set when rectifying.
 									BudgetRectifyMaxTokens = 64000
 									// BudgetRectifyMinMaxTokens is the minimum max_tokens that must exceed budget_tokens.
 									BudgetRectifyMinMaxTokens = 32001
 								)
 								// isThinkingBudgetConstraintError detects whether an upstream error message indicates
 								// a budget_tokens constraint violation (e.g. "budget_tokens >= 1024").
 								// Matches three conditions (all must be true):
 								//  1. Contains "budget_tokens" or "budget tokens"
 								//  2. Contains "thinking"
 								//  3. Contains ">= 1024" or "greater than or equal to 1024" or ("1024" + "input should be")
 								func isThinkingBudgetConstraintError(errMsg string) bool {
 									m := strings.ToLower(errMsg)
 									// Condition 1: budget_tokens or budget tokens
 									hasBudget := strings.Contains(m, "budget_tokens") || strings.Contains(m, "budget tokens")
 									if !hasBudget {
 										return false
 									}
 									// Condition 2: thinking
 									if !strings.Contains(m, "thinking") {
 										return false
 									}
 									// Condition 3: constraint indicator
 									if strings.Contains(m, ">= 1024") || strings.Contains(m, "greater than or equal to 1024") {
 										return true
 									}
 									if strings.Contains(m, "1024") && strings.Contains(m, "input should be") {
 										return true
 									}
 									return false
 								}
 								// RectifyThinkingBudget modifies the request body to fix budget_tokens constraint errors.
 								// It sets thinking.budget_tokens = 32000, thinking.type = "enabled" (unless adaptive),
 								// and ensures max_tokens >= 32001.
 								// Returns (modified body, true) if changes were applied, or (original body, false) if not.
 								func RectifyThinkingBudget(body []byte) ([]byte, bool) {
 									// If thinking type is "adaptive", skip rectification entirely
 									thinkingType := gjson.GetBytes(body, "thinking.type").String()
 									if thinkingType == "adaptive" {
 										return body, false
 									}
 									modified := body
 									changed := false
 									// Set thinking.type = "enabled"
 									if thinkingType != "enabled" {
 										if result, err := sjson.SetBytes(modified, "thinking.type", "enabled"); err == nil {
 											modified = result
 											changed = true
 										}
 									}
 									// Set thinking.budget_tokens = 32000
 									currentBudget := gjson.GetBytes(modified, "thinking.budget_tokens").Int()
 									if currentBudget != BudgetRectifyBudgetTokens {
 										if result, err := sjson.SetBytes(modified, "thinking.budget_tokens", BudgetRectifyBudgetTokens); err == nil {
 											modified = result
 											changed = true
 										}
 									}
 									// Ensure max_tokens >= BudgetRectifyMinMaxTokens
 									maxTokens := gjson.GetBytes(modified, "max_tokens").Int()
 									if maxTokens < int64(BudgetRectifyMinMaxTokens) {
 										if result, err := sjson.SetBytes(modified, "max_tokens", BudgetRectifyMaxTokens); err == nil {
 											modified = result
 											changed = true
 										}
 									}
 									return modified, changed
 								}