mirror of
https://gitee.com/wanwujie/sub2api
synced 2026-04-18 13:54:46 +08:00
Merge pull request #875 from mt21625457/fix/openai-fast-billing-clean
fix(billing): 修复 OpenAI fast 档位计费并补齐展示
This commit is contained in:
@@ -496,6 +496,7 @@ func usageLogFromServiceUser(l *service.UsageLog) UsageLog {
|
|||||||
AccountID: l.AccountID,
|
AccountID: l.AccountID,
|
||||||
RequestID: l.RequestID,
|
RequestID: l.RequestID,
|
||||||
Model: l.Model,
|
Model: l.Model,
|
||||||
|
ServiceTier: l.ServiceTier,
|
||||||
ReasoningEffort: l.ReasoningEffort,
|
ReasoningEffort: l.ReasoningEffort,
|
||||||
GroupID: l.GroupID,
|
GroupID: l.GroupID,
|
||||||
SubscriptionID: l.SubscriptionID,
|
SubscriptionID: l.SubscriptionID,
|
||||||
|
|||||||
@@ -71,3 +71,29 @@ func TestRequestTypeStringPtrNil(t *testing.T) {
|
|||||||
t.Parallel()
|
t.Parallel()
|
||||||
require.Nil(t, requestTypeStringPtr(nil))
|
require.Nil(t, requestTypeStringPtr(nil))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestUsageLogFromService_IncludesServiceTierForUserAndAdmin(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
serviceTier := "priority"
|
||||||
|
log := &service.UsageLog{
|
||||||
|
RequestID: "req_3",
|
||||||
|
Model: "gpt-5.4",
|
||||||
|
ServiceTier: &serviceTier,
|
||||||
|
AccountRateMultiplier: f64Ptr(1.5),
|
||||||
|
}
|
||||||
|
|
||||||
|
userDTO := UsageLogFromService(log)
|
||||||
|
adminDTO := UsageLogFromServiceAdmin(log)
|
||||||
|
|
||||||
|
require.NotNil(t, userDTO.ServiceTier)
|
||||||
|
require.Equal(t, serviceTier, *userDTO.ServiceTier)
|
||||||
|
require.NotNil(t, adminDTO.ServiceTier)
|
||||||
|
require.Equal(t, serviceTier, *adminDTO.ServiceTier)
|
||||||
|
require.NotNil(t, adminDTO.AccountRateMultiplier)
|
||||||
|
require.InDelta(t, 1.5, *adminDTO.AccountRateMultiplier, 1e-12)
|
||||||
|
}
|
||||||
|
|
||||||
|
func f64Ptr(value float64) *float64 {
|
||||||
|
return &value
|
||||||
|
}
|
||||||
|
|||||||
@@ -322,6 +322,8 @@ type UsageLog struct {
|
|||||||
AccountID int64 `json:"account_id"`
|
AccountID int64 `json:"account_id"`
|
||||||
RequestID string `json:"request_id"`
|
RequestID string `json:"request_id"`
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
|
// ServiceTier records the OpenAI service tier used for billing, e.g. "priority" / "flex".
|
||||||
|
ServiceTier *string `json:"service_tier,omitempty"`
|
||||||
// ReasoningEffort is the request's reasoning effort level (OpenAI Responses API).
|
// ReasoningEffort is the request's reasoning effort level (OpenAI Responses API).
|
||||||
// nil means not provided / not applicable.
|
// nil means not provided / not applicable.
|
||||||
ReasoningEffort *string `json:"reasoning_effort,omitempty"`
|
ReasoningEffort *string `json:"reasoning_effort,omitempty"`
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ import (
|
|||||||
"github.com/lib/pq"
|
"github.com/lib/pq"
|
||||||
)
|
)
|
||||||
|
|
||||||
const usageLogSelectColumns = "id, user_id, api_key_id, account_id, request_id, model, group_id, subscription_id, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, cache_creation_5m_tokens, cache_creation_1h_tokens, input_cost, output_cost, cache_creation_cost, cache_read_cost, total_cost, actual_cost, rate_multiplier, account_rate_multiplier, billing_type, request_type, stream, openai_ws_mode, duration_ms, first_token_ms, user_agent, ip_address, image_count, image_size, media_type, reasoning_effort, cache_ttl_overridden, created_at"
|
const usageLogSelectColumns = "id, user_id, api_key_id, account_id, request_id, model, group_id, subscription_id, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, cache_creation_5m_tokens, cache_creation_1h_tokens, input_cost, output_cost, cache_creation_cost, cache_read_cost, total_cost, actual_cost, rate_multiplier, account_rate_multiplier, billing_type, request_type, stream, openai_ws_mode, duration_ms, first_token_ms, user_agent, ip_address, image_count, image_size, media_type, service_tier, reasoning_effort, cache_ttl_overridden, created_at"
|
||||||
|
|
||||||
// dateFormatWhitelist 将 granularity 参数映射为 PostgreSQL TO_CHAR 格式字符串,防止外部输入直接拼入 SQL
|
// dateFormatWhitelist 将 granularity 参数映射为 PostgreSQL TO_CHAR 格式字符串,防止外部输入直接拼入 SQL
|
||||||
var dateFormatWhitelist = map[string]string{
|
var dateFormatWhitelist = map[string]string{
|
||||||
@@ -135,6 +135,7 @@ func (r *usageLogRepository) Create(ctx context.Context, log *service.UsageLog)
|
|||||||
image_count,
|
image_count,
|
||||||
image_size,
|
image_size,
|
||||||
media_type,
|
media_type,
|
||||||
|
service_tier,
|
||||||
reasoning_effort,
|
reasoning_effort,
|
||||||
cache_ttl_overridden,
|
cache_ttl_overridden,
|
||||||
created_at
|
created_at
|
||||||
@@ -144,7 +145,7 @@ func (r *usageLogRepository) Create(ctx context.Context, log *service.UsageLog)
|
|||||||
$8, $9, $10, $11,
|
$8, $9, $10, $11,
|
||||||
$12, $13,
|
$12, $13,
|
||||||
$14, $15, $16, $17, $18, $19,
|
$14, $15, $16, $17, $18, $19,
|
||||||
$20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35
|
$20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36
|
||||||
)
|
)
|
||||||
ON CONFLICT (request_id, api_key_id) DO NOTHING
|
ON CONFLICT (request_id, api_key_id) DO NOTHING
|
||||||
RETURNING id, created_at
|
RETURNING id, created_at
|
||||||
@@ -158,6 +159,7 @@ func (r *usageLogRepository) Create(ctx context.Context, log *service.UsageLog)
|
|||||||
ipAddress := nullString(log.IPAddress)
|
ipAddress := nullString(log.IPAddress)
|
||||||
imageSize := nullString(log.ImageSize)
|
imageSize := nullString(log.ImageSize)
|
||||||
mediaType := nullString(log.MediaType)
|
mediaType := nullString(log.MediaType)
|
||||||
|
serviceTier := nullString(log.ServiceTier)
|
||||||
reasoningEffort := nullString(log.ReasoningEffort)
|
reasoningEffort := nullString(log.ReasoningEffort)
|
||||||
|
|
||||||
var requestIDArg any
|
var requestIDArg any
|
||||||
@@ -198,6 +200,7 @@ func (r *usageLogRepository) Create(ctx context.Context, log *service.UsageLog)
|
|||||||
log.ImageCount,
|
log.ImageCount,
|
||||||
imageSize,
|
imageSize,
|
||||||
mediaType,
|
mediaType,
|
||||||
|
serviceTier,
|
||||||
reasoningEffort,
|
reasoningEffort,
|
||||||
log.CacheTTLOverridden,
|
log.CacheTTLOverridden,
|
||||||
createdAt,
|
createdAt,
|
||||||
@@ -2505,6 +2508,7 @@ func scanUsageLog(scanner interface{ Scan(...any) error }) (*service.UsageLog, e
|
|||||||
imageCount int
|
imageCount int
|
||||||
imageSize sql.NullString
|
imageSize sql.NullString
|
||||||
mediaType sql.NullString
|
mediaType sql.NullString
|
||||||
|
serviceTier sql.NullString
|
||||||
reasoningEffort sql.NullString
|
reasoningEffort sql.NullString
|
||||||
cacheTTLOverridden bool
|
cacheTTLOverridden bool
|
||||||
createdAt time.Time
|
createdAt time.Time
|
||||||
@@ -2544,6 +2548,7 @@ func scanUsageLog(scanner interface{ Scan(...any) error }) (*service.UsageLog, e
|
|||||||
&imageCount,
|
&imageCount,
|
||||||
&imageSize,
|
&imageSize,
|
||||||
&mediaType,
|
&mediaType,
|
||||||
|
&serviceTier,
|
||||||
&reasoningEffort,
|
&reasoningEffort,
|
||||||
&cacheTTLOverridden,
|
&cacheTTLOverridden,
|
||||||
&createdAt,
|
&createdAt,
|
||||||
@@ -2614,6 +2619,9 @@ func scanUsageLog(scanner interface{ Scan(...any) error }) (*service.UsageLog, e
|
|||||||
if mediaType.Valid {
|
if mediaType.Valid {
|
||||||
log.MediaType = &mediaType.String
|
log.MediaType = &mediaType.String
|
||||||
}
|
}
|
||||||
|
if serviceTier.Valid {
|
||||||
|
log.ServiceTier = &serviceTier.String
|
||||||
|
}
|
||||||
if reasoningEffort.Valid {
|
if reasoningEffort.Valid {
|
||||||
log.ReasoningEffort = &reasoningEffort.String
|
log.ReasoningEffort = &reasoningEffort.String
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -71,6 +71,7 @@ func TestUsageLogRepositoryCreateSyncRequestTypeAndLegacyFields(t *testing.T) {
|
|||||||
log.ImageCount,
|
log.ImageCount,
|
||||||
sqlmock.AnyArg(), // image_size
|
sqlmock.AnyArg(), // image_size
|
||||||
sqlmock.AnyArg(), // media_type
|
sqlmock.AnyArg(), // media_type
|
||||||
|
sqlmock.AnyArg(), // service_tier
|
||||||
sqlmock.AnyArg(), // reasoning_effort
|
sqlmock.AnyArg(), // reasoning_effort
|
||||||
log.CacheTTLOverridden,
|
log.CacheTTLOverridden,
|
||||||
createdAt,
|
createdAt,
|
||||||
@@ -81,12 +82,76 @@ func TestUsageLogRepositoryCreateSyncRequestTypeAndLegacyFields(t *testing.T) {
|
|||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.True(t, inserted)
|
require.True(t, inserted)
|
||||||
require.Equal(t, int64(99), log.ID)
|
require.Equal(t, int64(99), log.ID)
|
||||||
|
require.Nil(t, log.ServiceTier)
|
||||||
require.Equal(t, service.RequestTypeWSV2, log.RequestType)
|
require.Equal(t, service.RequestTypeWSV2, log.RequestType)
|
||||||
require.True(t, log.Stream)
|
require.True(t, log.Stream)
|
||||||
require.True(t, log.OpenAIWSMode)
|
require.True(t, log.OpenAIWSMode)
|
||||||
require.NoError(t, mock.ExpectationsWereMet())
|
require.NoError(t, mock.ExpectationsWereMet())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestUsageLogRepositoryCreate_PersistsServiceTier(t *testing.T) {
|
||||||
|
db, mock := newSQLMock(t)
|
||||||
|
repo := &usageLogRepository{sql: db}
|
||||||
|
|
||||||
|
createdAt := time.Date(2025, 1, 2, 12, 0, 0, 0, time.UTC)
|
||||||
|
serviceTier := "priority"
|
||||||
|
log := &service.UsageLog{
|
||||||
|
UserID: 1,
|
||||||
|
APIKeyID: 2,
|
||||||
|
AccountID: 3,
|
||||||
|
RequestID: "req-service-tier",
|
||||||
|
Model: "gpt-5.4",
|
||||||
|
ServiceTier: &serviceTier,
|
||||||
|
CreatedAt: createdAt,
|
||||||
|
}
|
||||||
|
|
||||||
|
mock.ExpectQuery("INSERT INTO usage_logs").
|
||||||
|
WithArgs(
|
||||||
|
log.UserID,
|
||||||
|
log.APIKeyID,
|
||||||
|
log.AccountID,
|
||||||
|
log.RequestID,
|
||||||
|
log.Model,
|
||||||
|
sqlmock.AnyArg(),
|
||||||
|
sqlmock.AnyArg(),
|
||||||
|
log.InputTokens,
|
||||||
|
log.OutputTokens,
|
||||||
|
log.CacheCreationTokens,
|
||||||
|
log.CacheReadTokens,
|
||||||
|
log.CacheCreation5mTokens,
|
||||||
|
log.CacheCreation1hTokens,
|
||||||
|
log.InputCost,
|
||||||
|
log.OutputCost,
|
||||||
|
log.CacheCreationCost,
|
||||||
|
log.CacheReadCost,
|
||||||
|
log.TotalCost,
|
||||||
|
log.ActualCost,
|
||||||
|
log.RateMultiplier,
|
||||||
|
log.AccountRateMultiplier,
|
||||||
|
log.BillingType,
|
||||||
|
int16(service.RequestTypeSync),
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
sqlmock.AnyArg(),
|
||||||
|
sqlmock.AnyArg(),
|
||||||
|
sqlmock.AnyArg(),
|
||||||
|
sqlmock.AnyArg(),
|
||||||
|
log.ImageCount,
|
||||||
|
sqlmock.AnyArg(),
|
||||||
|
sqlmock.AnyArg(),
|
||||||
|
serviceTier,
|
||||||
|
sqlmock.AnyArg(),
|
||||||
|
log.CacheTTLOverridden,
|
||||||
|
createdAt,
|
||||||
|
).
|
||||||
|
WillReturnRows(sqlmock.NewRows([]string{"id", "created_at"}).AddRow(int64(100), createdAt))
|
||||||
|
|
||||||
|
inserted, err := repo.Create(context.Background(), log)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.True(t, inserted)
|
||||||
|
require.NoError(t, mock.ExpectationsWereMet())
|
||||||
|
}
|
||||||
|
|
||||||
func TestUsageLogRepositoryListWithFiltersRequestTypePriority(t *testing.T) {
|
func TestUsageLogRepositoryListWithFiltersRequestTypePriority(t *testing.T) {
|
||||||
db, mock := newSQLMock(t)
|
db, mock := newSQLMock(t)
|
||||||
repo := &usageLogRepository{sql: db}
|
repo := &usageLogRepository{sql: db}
|
||||||
@@ -280,11 +345,14 @@ func TestScanUsageLogRequestTypeAndLegacyFallback(t *testing.T) {
|
|||||||
0,
|
0,
|
||||||
sql.NullString{},
|
sql.NullString{},
|
||||||
sql.NullString{},
|
sql.NullString{},
|
||||||
|
sql.NullString{Valid: true, String: "priority"},
|
||||||
sql.NullString{},
|
sql.NullString{},
|
||||||
false,
|
false,
|
||||||
now,
|
now,
|
||||||
}})
|
}})
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, log.ServiceTier)
|
||||||
|
require.Equal(t, "priority", *log.ServiceTier)
|
||||||
require.Equal(t, service.RequestTypeWSV2, log.RequestType)
|
require.Equal(t, service.RequestTypeWSV2, log.RequestType)
|
||||||
require.True(t, log.Stream)
|
require.True(t, log.Stream)
|
||||||
require.True(t, log.OpenAIWSMode)
|
require.True(t, log.OpenAIWSMode)
|
||||||
@@ -316,13 +384,53 @@ func TestScanUsageLogRequestTypeAndLegacyFallback(t *testing.T) {
|
|||||||
0,
|
0,
|
||||||
sql.NullString{},
|
sql.NullString{},
|
||||||
sql.NullString{},
|
sql.NullString{},
|
||||||
|
sql.NullString{Valid: true, String: "flex"},
|
||||||
sql.NullString{},
|
sql.NullString{},
|
||||||
false,
|
false,
|
||||||
now,
|
now,
|
||||||
}})
|
}})
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, log.ServiceTier)
|
||||||
|
require.Equal(t, "flex", *log.ServiceTier)
|
||||||
require.Equal(t, service.RequestTypeStream, log.RequestType)
|
require.Equal(t, service.RequestTypeStream, log.RequestType)
|
||||||
require.True(t, log.Stream)
|
require.True(t, log.Stream)
|
||||||
require.False(t, log.OpenAIWSMode)
|
require.False(t, log.OpenAIWSMode)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("service_tier_is_scanned", func(t *testing.T) {
|
||||||
|
now := time.Now().UTC()
|
||||||
|
log, err := scanUsageLog(usageLogScannerStub{values: []any{
|
||||||
|
int64(3),
|
||||||
|
int64(12),
|
||||||
|
int64(22),
|
||||||
|
int64(32),
|
||||||
|
sql.NullString{Valid: true, String: "req-3"},
|
||||||
|
"gpt-5.4",
|
||||||
|
sql.NullInt64{},
|
||||||
|
sql.NullInt64{},
|
||||||
|
1, 2, 3, 4, 5, 6,
|
||||||
|
0.1, 0.2, 0.3, 0.4, 1.0, 0.9,
|
||||||
|
1.0,
|
||||||
|
sql.NullFloat64{},
|
||||||
|
int16(service.BillingTypeBalance),
|
||||||
|
int16(service.RequestTypeSync),
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
sql.NullInt64{},
|
||||||
|
sql.NullInt64{},
|
||||||
|
sql.NullString{},
|
||||||
|
sql.NullString{},
|
||||||
|
0,
|
||||||
|
sql.NullString{},
|
||||||
|
sql.NullString{},
|
||||||
|
sql.NullString{Valid: true, String: "priority"},
|
||||||
|
sql.NullString{},
|
||||||
|
false,
|
||||||
|
now,
|
||||||
|
}})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, log.ServiceTier)
|
||||||
|
require.Equal(t, "priority", *log.ServiceTier)
|
||||||
|
})
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -43,16 +43,19 @@ type BillingCache interface {
|
|||||||
|
|
||||||
// ModelPricing 模型价格配置(per-token价格,与LiteLLM格式一致)
|
// ModelPricing 模型价格配置(per-token价格,与LiteLLM格式一致)
|
||||||
type ModelPricing struct {
|
type ModelPricing struct {
|
||||||
InputPricePerToken float64 // 每token输入价格 (USD)
|
InputPricePerToken float64 // 每token输入价格 (USD)
|
||||||
OutputPricePerToken float64 // 每token输出价格 (USD)
|
InputPricePerTokenPriority float64 // priority service tier 下每token输入价格 (USD)
|
||||||
CacheCreationPricePerToken float64 // 缓存创建每token价格 (USD)
|
OutputPricePerToken float64 // 每token输出价格 (USD)
|
||||||
CacheReadPricePerToken float64 // 缓存读取每token价格 (USD)
|
OutputPricePerTokenPriority float64 // priority service tier 下每token输出价格 (USD)
|
||||||
CacheCreation5mPrice float64 // 5分钟缓存创建每token价格 (USD)
|
CacheCreationPricePerToken float64 // 缓存创建每token价格 (USD)
|
||||||
CacheCreation1hPrice float64 // 1小时缓存创建每token价格 (USD)
|
CacheReadPricePerToken float64 // 缓存读取每token价格 (USD)
|
||||||
SupportsCacheBreakdown bool // 是否支持详细的缓存分类
|
CacheReadPricePerTokenPriority float64 // priority service tier 下缓存读取每token价格 (USD)
|
||||||
LongContextInputThreshold int // 超过阈值后按整次会话提升输入价格
|
CacheCreation5mPrice float64 // 5分钟缓存创建每token价格 (USD)
|
||||||
LongContextInputMultiplier float64 // 长上下文整次会话输入倍率
|
CacheCreation1hPrice float64 // 1小时缓存创建每token价格 (USD)
|
||||||
LongContextOutputMultiplier float64 // 长上下文整次会话输出倍率
|
SupportsCacheBreakdown bool // 是否支持详细的缓存分类
|
||||||
|
LongContextInputThreshold int // 超过阈值后按整次会话提升输入价格
|
||||||
|
LongContextInputMultiplier float64 // 长上下文整次会话输入倍率
|
||||||
|
LongContextOutputMultiplier float64 // 长上下文整次会话输出倍率
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -61,6 +64,28 @@ const (
|
|||||||
openAIGPT54LongContextOutputMultiplier = 1.5
|
openAIGPT54LongContextOutputMultiplier = 1.5
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func normalizeBillingServiceTier(serviceTier string) string {
|
||||||
|
return strings.ToLower(strings.TrimSpace(serviceTier))
|
||||||
|
}
|
||||||
|
|
||||||
|
func usePriorityServiceTierPricing(serviceTier string, pricing *ModelPricing) bool {
|
||||||
|
if pricing == nil || normalizeBillingServiceTier(serviceTier) != "priority" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return pricing.InputPricePerTokenPriority > 0 || pricing.OutputPricePerTokenPriority > 0 || pricing.CacheReadPricePerTokenPriority > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func serviceTierCostMultiplier(serviceTier string) float64 {
|
||||||
|
switch normalizeBillingServiceTier(serviceTier) {
|
||||||
|
case "priority":
|
||||||
|
return 2.0
|
||||||
|
case "flex":
|
||||||
|
return 0.5
|
||||||
|
default:
|
||||||
|
return 1.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// UsageTokens 使用的token数量
|
// UsageTokens 使用的token数量
|
||||||
type UsageTokens struct {
|
type UsageTokens struct {
|
||||||
InputTokens int
|
InputTokens int
|
||||||
@@ -173,30 +198,60 @@ func (s *BillingService) initFallbackPricing() {
|
|||||||
|
|
||||||
// OpenAI GPT-5.1(本地兜底,防止动态定价不可用时拒绝计费)
|
// OpenAI GPT-5.1(本地兜底,防止动态定价不可用时拒绝计费)
|
||||||
s.fallbackPrices["gpt-5.1"] = &ModelPricing{
|
s.fallbackPrices["gpt-5.1"] = &ModelPricing{
|
||||||
InputPricePerToken: 1.25e-6, // $1.25 per MTok
|
InputPricePerToken: 1.25e-6, // $1.25 per MTok
|
||||||
OutputPricePerToken: 10e-6, // $10 per MTok
|
InputPricePerTokenPriority: 2.5e-6, // $2.5 per MTok
|
||||||
CacheCreationPricePerToken: 1.25e-6, // $1.25 per MTok
|
OutputPricePerToken: 10e-6, // $10 per MTok
|
||||||
CacheReadPricePerToken: 0.125e-6,
|
OutputPricePerTokenPriority: 20e-6, // $20 per MTok
|
||||||
SupportsCacheBreakdown: false,
|
CacheCreationPricePerToken: 1.25e-6, // $1.25 per MTok
|
||||||
|
CacheReadPricePerToken: 0.125e-6,
|
||||||
|
CacheReadPricePerTokenPriority: 0.25e-6,
|
||||||
|
SupportsCacheBreakdown: false,
|
||||||
}
|
}
|
||||||
// OpenAI GPT-5.4(业务指定价格)
|
// OpenAI GPT-5.4(业务指定价格)
|
||||||
s.fallbackPrices["gpt-5.4"] = &ModelPricing{
|
s.fallbackPrices["gpt-5.4"] = &ModelPricing{
|
||||||
InputPricePerToken: 2.5e-6, // $2.5 per MTok
|
InputPricePerToken: 2.5e-6, // $2.5 per MTok
|
||||||
OutputPricePerToken: 15e-6, // $15 per MTok
|
InputPricePerTokenPriority: 5e-6, // $5 per MTok
|
||||||
CacheCreationPricePerToken: 2.5e-6, // $2.5 per MTok
|
OutputPricePerToken: 15e-6, // $15 per MTok
|
||||||
CacheReadPricePerToken: 0.25e-6, // $0.25 per MTok
|
OutputPricePerTokenPriority: 30e-6, // $30 per MTok
|
||||||
SupportsCacheBreakdown: false,
|
CacheCreationPricePerToken: 2.5e-6, // $2.5 per MTok
|
||||||
LongContextInputThreshold: openAIGPT54LongContextInputThreshold,
|
CacheReadPricePerToken: 0.25e-6, // $0.25 per MTok
|
||||||
LongContextInputMultiplier: openAIGPT54LongContextInputMultiplier,
|
CacheReadPricePerTokenPriority: 0.5e-6, // $0.5 per MTok
|
||||||
LongContextOutputMultiplier: openAIGPT54LongContextOutputMultiplier,
|
SupportsCacheBreakdown: false,
|
||||||
|
LongContextInputThreshold: openAIGPT54LongContextInputThreshold,
|
||||||
|
LongContextInputMultiplier: openAIGPT54LongContextInputMultiplier,
|
||||||
|
LongContextOutputMultiplier: openAIGPT54LongContextOutputMultiplier,
|
||||||
|
}
|
||||||
|
// OpenAI GPT-5.2(本地兜底)
|
||||||
|
s.fallbackPrices["gpt-5.2"] = &ModelPricing{
|
||||||
|
InputPricePerToken: 1.75e-6,
|
||||||
|
InputPricePerTokenPriority: 3.5e-6,
|
||||||
|
OutputPricePerToken: 14e-6,
|
||||||
|
OutputPricePerTokenPriority: 28e-6,
|
||||||
|
CacheCreationPricePerToken: 1.75e-6,
|
||||||
|
CacheReadPricePerToken: 0.175e-6,
|
||||||
|
CacheReadPricePerTokenPriority: 0.35e-6,
|
||||||
|
SupportsCacheBreakdown: false,
|
||||||
}
|
}
|
||||||
// Codex 族兜底统一按 GPT-5.1 Codex 价格计费
|
// Codex 族兜底统一按 GPT-5.1 Codex 价格计费
|
||||||
s.fallbackPrices["gpt-5.1-codex"] = &ModelPricing{
|
s.fallbackPrices["gpt-5.1-codex"] = &ModelPricing{
|
||||||
InputPricePerToken: 1.5e-6, // $1.5 per MTok
|
InputPricePerToken: 1.5e-6, // $1.5 per MTok
|
||||||
OutputPricePerToken: 12e-6, // $12 per MTok
|
InputPricePerTokenPriority: 3e-6, // $3 per MTok
|
||||||
CacheCreationPricePerToken: 1.5e-6, // $1.5 per MTok
|
OutputPricePerToken: 12e-6, // $12 per MTok
|
||||||
CacheReadPricePerToken: 0.15e-6,
|
OutputPricePerTokenPriority: 24e-6, // $24 per MTok
|
||||||
SupportsCacheBreakdown: false,
|
CacheCreationPricePerToken: 1.5e-6, // $1.5 per MTok
|
||||||
|
CacheReadPricePerToken: 0.15e-6,
|
||||||
|
CacheReadPricePerTokenPriority: 0.3e-6,
|
||||||
|
SupportsCacheBreakdown: false,
|
||||||
|
}
|
||||||
|
s.fallbackPrices["gpt-5.2-codex"] = &ModelPricing{
|
||||||
|
InputPricePerToken: 1.75e-6,
|
||||||
|
InputPricePerTokenPriority: 3.5e-6,
|
||||||
|
OutputPricePerToken: 14e-6,
|
||||||
|
OutputPricePerTokenPriority: 28e-6,
|
||||||
|
CacheCreationPricePerToken: 1.75e-6,
|
||||||
|
CacheReadPricePerToken: 0.175e-6,
|
||||||
|
CacheReadPricePerTokenPriority: 0.35e-6,
|
||||||
|
SupportsCacheBreakdown: false,
|
||||||
}
|
}
|
||||||
s.fallbackPrices["gpt-5.3-codex"] = s.fallbackPrices["gpt-5.1-codex"]
|
s.fallbackPrices["gpt-5.3-codex"] = s.fallbackPrices["gpt-5.1-codex"]
|
||||||
}
|
}
|
||||||
@@ -241,6 +296,10 @@ func (s *BillingService) getFallbackPricing(model string) *ModelPricing {
|
|||||||
switch normalized {
|
switch normalized {
|
||||||
case "gpt-5.4":
|
case "gpt-5.4":
|
||||||
return s.fallbackPrices["gpt-5.4"]
|
return s.fallbackPrices["gpt-5.4"]
|
||||||
|
case "gpt-5.2":
|
||||||
|
return s.fallbackPrices["gpt-5.2"]
|
||||||
|
case "gpt-5.2-codex":
|
||||||
|
return s.fallbackPrices["gpt-5.2-codex"]
|
||||||
case "gpt-5.3-codex":
|
case "gpt-5.3-codex":
|
||||||
return s.fallbackPrices["gpt-5.3-codex"]
|
return s.fallbackPrices["gpt-5.3-codex"]
|
||||||
case "gpt-5.1-codex", "gpt-5.1-codex-max", "gpt-5.1-codex-mini", "codex-mini-latest":
|
case "gpt-5.1-codex", "gpt-5.1-codex-max", "gpt-5.1-codex-mini", "codex-mini-latest":
|
||||||
@@ -269,16 +328,19 @@ func (s *BillingService) GetModelPricing(model string) (*ModelPricing, error) {
|
|||||||
price1h := litellmPricing.CacheCreationInputTokenCostAbove1hr
|
price1h := litellmPricing.CacheCreationInputTokenCostAbove1hr
|
||||||
enableBreakdown := price1h > 0 && price1h > price5m
|
enableBreakdown := price1h > 0 && price1h > price5m
|
||||||
return s.applyModelSpecificPricingPolicy(model, &ModelPricing{
|
return s.applyModelSpecificPricingPolicy(model, &ModelPricing{
|
||||||
InputPricePerToken: litellmPricing.InputCostPerToken,
|
InputPricePerToken: litellmPricing.InputCostPerToken,
|
||||||
OutputPricePerToken: litellmPricing.OutputCostPerToken,
|
InputPricePerTokenPriority: litellmPricing.InputCostPerTokenPriority,
|
||||||
CacheCreationPricePerToken: litellmPricing.CacheCreationInputTokenCost,
|
OutputPricePerToken: litellmPricing.OutputCostPerToken,
|
||||||
CacheReadPricePerToken: litellmPricing.CacheReadInputTokenCost,
|
OutputPricePerTokenPriority: litellmPricing.OutputCostPerTokenPriority,
|
||||||
CacheCreation5mPrice: price5m,
|
CacheCreationPricePerToken: litellmPricing.CacheCreationInputTokenCost,
|
||||||
CacheCreation1hPrice: price1h,
|
CacheReadPricePerToken: litellmPricing.CacheReadInputTokenCost,
|
||||||
SupportsCacheBreakdown: enableBreakdown,
|
CacheReadPricePerTokenPriority: litellmPricing.CacheReadInputTokenCostPriority,
|
||||||
LongContextInputThreshold: litellmPricing.LongContextInputTokenThreshold,
|
CacheCreation5mPrice: price5m,
|
||||||
LongContextInputMultiplier: litellmPricing.LongContextInputCostMultiplier,
|
CacheCreation1hPrice: price1h,
|
||||||
LongContextOutputMultiplier: litellmPricing.LongContextOutputCostMultiplier,
|
SupportsCacheBreakdown: enableBreakdown,
|
||||||
|
LongContextInputThreshold: litellmPricing.LongContextInputTokenThreshold,
|
||||||
|
LongContextInputMultiplier: litellmPricing.LongContextInputCostMultiplier,
|
||||||
|
LongContextOutputMultiplier: litellmPricing.LongContextOutputCostMultiplier,
|
||||||
}), nil
|
}), nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -295,6 +357,10 @@ func (s *BillingService) GetModelPricing(model string) (*ModelPricing, error) {
|
|||||||
|
|
||||||
// CalculateCost 计算使用费用
|
// CalculateCost 计算使用费用
|
||||||
func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMultiplier float64) (*CostBreakdown, error) {
|
func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMultiplier float64) (*CostBreakdown, error) {
|
||||||
|
return s.CalculateCostWithServiceTier(model, tokens, rateMultiplier, "")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BillingService) CalculateCostWithServiceTier(model string, tokens UsageTokens, rateMultiplier float64, serviceTier string) (*CostBreakdown, error) {
|
||||||
pricing, err := s.GetModelPricing(model)
|
pricing, err := s.GetModelPricing(model)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -303,6 +369,21 @@ func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMul
|
|||||||
breakdown := &CostBreakdown{}
|
breakdown := &CostBreakdown{}
|
||||||
inputPricePerToken := pricing.InputPricePerToken
|
inputPricePerToken := pricing.InputPricePerToken
|
||||||
outputPricePerToken := pricing.OutputPricePerToken
|
outputPricePerToken := pricing.OutputPricePerToken
|
||||||
|
cacheReadPricePerToken := pricing.CacheReadPricePerToken
|
||||||
|
tierMultiplier := 1.0
|
||||||
|
if usePriorityServiceTierPricing(serviceTier, pricing) {
|
||||||
|
if pricing.InputPricePerTokenPriority > 0 {
|
||||||
|
inputPricePerToken = pricing.InputPricePerTokenPriority
|
||||||
|
}
|
||||||
|
if pricing.OutputPricePerTokenPriority > 0 {
|
||||||
|
outputPricePerToken = pricing.OutputPricePerTokenPriority
|
||||||
|
}
|
||||||
|
if pricing.CacheReadPricePerTokenPriority > 0 {
|
||||||
|
cacheReadPricePerToken = pricing.CacheReadPricePerTokenPriority
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tierMultiplier = serviceTierCostMultiplier(serviceTier)
|
||||||
|
}
|
||||||
if s.shouldApplySessionLongContextPricing(tokens, pricing) {
|
if s.shouldApplySessionLongContextPricing(tokens, pricing) {
|
||||||
inputPricePerToken *= pricing.LongContextInputMultiplier
|
inputPricePerToken *= pricing.LongContextInputMultiplier
|
||||||
outputPricePerToken *= pricing.LongContextOutputMultiplier
|
outputPricePerToken *= pricing.LongContextOutputMultiplier
|
||||||
@@ -329,7 +410,14 @@ func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMul
|
|||||||
breakdown.CacheCreationCost = float64(tokens.CacheCreationTokens) * pricing.CacheCreationPricePerToken
|
breakdown.CacheCreationCost = float64(tokens.CacheCreationTokens) * pricing.CacheCreationPricePerToken
|
||||||
}
|
}
|
||||||
|
|
||||||
breakdown.CacheReadCost = float64(tokens.CacheReadTokens) * pricing.CacheReadPricePerToken
|
breakdown.CacheReadCost = float64(tokens.CacheReadTokens) * cacheReadPricePerToken
|
||||||
|
|
||||||
|
if tierMultiplier != 1.0 {
|
||||||
|
breakdown.InputCost *= tierMultiplier
|
||||||
|
breakdown.OutputCost *= tierMultiplier
|
||||||
|
breakdown.CacheCreationCost *= tierMultiplier
|
||||||
|
breakdown.CacheReadCost *= tierMultiplier
|
||||||
|
}
|
||||||
|
|
||||||
// 计算总费用
|
// 计算总费用
|
||||||
breakdown.TotalCost = breakdown.InputCost + breakdown.OutputCost +
|
breakdown.TotalCost = breakdown.InputCost + breakdown.OutputCost +
|
||||||
|
|||||||
@@ -522,3 +522,189 @@ func TestCalculateCost_LargeTokenCount(t *testing.T) {
|
|||||||
require.False(t, math.IsNaN(cost.TotalCost))
|
require.False(t, math.IsNaN(cost.TotalCost))
|
||||||
require.False(t, math.IsInf(cost.TotalCost, 0))
|
require.False(t, math.IsInf(cost.TotalCost, 0))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestServiceTierCostMultiplier(t *testing.T) {
|
||||||
|
require.InDelta(t, 2.0, serviceTierCostMultiplier("priority"), 1e-12)
|
||||||
|
require.InDelta(t, 2.0, serviceTierCostMultiplier(" Priority "), 1e-12)
|
||||||
|
require.InDelta(t, 0.5, serviceTierCostMultiplier("flex"), 1e-12)
|
||||||
|
require.InDelta(t, 1.0, serviceTierCostMultiplier(""), 1e-12)
|
||||||
|
require.InDelta(t, 1.0, serviceTierCostMultiplier("default"), 1e-12)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateCostWithServiceTier_OpenAIPriorityUsesPriorityPricing(t *testing.T) {
|
||||||
|
svc := newTestBillingService()
|
||||||
|
tokens := UsageTokens{InputTokens: 100, OutputTokens: 50, CacheReadTokens: 20}
|
||||||
|
|
||||||
|
baseCost, err := svc.CalculateCost("gpt-5.1-codex", tokens, 1.0)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
priorityCost, err := svc.CalculateCostWithServiceTier("gpt-5.1-codex", tokens, 1.0, "priority")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.InDelta(t, baseCost.InputCost*2, priorityCost.InputCost, 1e-10)
|
||||||
|
require.InDelta(t, baseCost.OutputCost*2, priorityCost.OutputCost, 1e-10)
|
||||||
|
require.InDelta(t, baseCost.CacheReadCost*2, priorityCost.CacheReadCost, 1e-10)
|
||||||
|
require.InDelta(t, baseCost.TotalCost*2, priorityCost.TotalCost, 1e-10)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateCostWithServiceTier_FlexAppliesHalfMultiplier(t *testing.T) {
|
||||||
|
svc := newTestBillingService()
|
||||||
|
tokens := UsageTokens{InputTokens: 100, OutputTokens: 50, CacheCreationTokens: 40, CacheReadTokens: 20}
|
||||||
|
|
||||||
|
baseCost, err := svc.CalculateCost("gpt-5.4", tokens, 1.0)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
flexCost, err := svc.CalculateCostWithServiceTier("gpt-5.4", tokens, 1.0, "flex")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.InDelta(t, baseCost.InputCost*0.5, flexCost.InputCost, 1e-10)
|
||||||
|
require.InDelta(t, baseCost.OutputCost*0.5, flexCost.OutputCost, 1e-10)
|
||||||
|
require.InDelta(t, baseCost.CacheCreationCost*0.5, flexCost.CacheCreationCost, 1e-10)
|
||||||
|
require.InDelta(t, baseCost.CacheReadCost*0.5, flexCost.CacheReadCost, 1e-10)
|
||||||
|
require.InDelta(t, baseCost.TotalCost*0.5, flexCost.TotalCost, 1e-10)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateCostWithServiceTier_PriorityFallsBackToTierMultiplierWithoutExplicitPriorityPrice(t *testing.T) {
|
||||||
|
svc := newTestBillingService()
|
||||||
|
tokens := UsageTokens{InputTokens: 120, OutputTokens: 30, CacheCreationTokens: 12, CacheReadTokens: 8}
|
||||||
|
|
||||||
|
baseCost, err := svc.CalculateCost("claude-sonnet-4", tokens, 1.0)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
priorityCost, err := svc.CalculateCostWithServiceTier("claude-sonnet-4", tokens, 1.0, "priority")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.InDelta(t, baseCost.InputCost*2, priorityCost.InputCost, 1e-10)
|
||||||
|
require.InDelta(t, baseCost.OutputCost*2, priorityCost.OutputCost, 1e-10)
|
||||||
|
require.InDelta(t, baseCost.CacheCreationCost*2, priorityCost.CacheCreationCost, 1e-10)
|
||||||
|
require.InDelta(t, baseCost.CacheReadCost*2, priorityCost.CacheReadCost, 1e-10)
|
||||||
|
require.InDelta(t, baseCost.TotalCost*2, priorityCost.TotalCost, 1e-10)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBillingServiceGetModelPricing_UsesDynamicPriorityFields(t *testing.T) {
|
||||||
|
pricingSvc := &PricingService{
|
||||||
|
pricingData: map[string]*LiteLLMModelPricing{
|
||||||
|
"gpt-5.4": {
|
||||||
|
InputCostPerToken: 2.5e-6,
|
||||||
|
InputCostPerTokenPriority: 5e-6,
|
||||||
|
OutputCostPerToken: 15e-6,
|
||||||
|
OutputCostPerTokenPriority: 30e-6,
|
||||||
|
CacheCreationInputTokenCost: 2.5e-6,
|
||||||
|
CacheReadInputTokenCost: 0.25e-6,
|
||||||
|
CacheReadInputTokenCostPriority: 0.5e-6,
|
||||||
|
LongContextInputTokenThreshold: 272000,
|
||||||
|
LongContextInputCostMultiplier: 2.0,
|
||||||
|
LongContextOutputCostMultiplier: 1.5,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
svc := NewBillingService(&config.Config{}, pricingSvc)
|
||||||
|
|
||||||
|
pricing, err := svc.GetModelPricing("gpt-5.4")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.InDelta(t, 2.5e-6, pricing.InputPricePerToken, 1e-12)
|
||||||
|
require.InDelta(t, 5e-6, pricing.InputPricePerTokenPriority, 1e-12)
|
||||||
|
require.InDelta(t, 15e-6, pricing.OutputPricePerToken, 1e-12)
|
||||||
|
require.InDelta(t, 30e-6, pricing.OutputPricePerTokenPriority, 1e-12)
|
||||||
|
require.InDelta(t, 0.25e-6, pricing.CacheReadPricePerToken, 1e-12)
|
||||||
|
require.InDelta(t, 0.5e-6, pricing.CacheReadPricePerTokenPriority, 1e-12)
|
||||||
|
require.Equal(t, 272000, pricing.LongContextInputThreshold)
|
||||||
|
require.InDelta(t, 2.0, pricing.LongContextInputMultiplier, 1e-12)
|
||||||
|
require.InDelta(t, 1.5, pricing.LongContextOutputMultiplier, 1e-12)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBillingServiceGetModelPricing_OpenAIFallbackGpt52Variants(t *testing.T) {
|
||||||
|
svc := newTestBillingService()
|
||||||
|
|
||||||
|
gpt52, err := svc.GetModelPricing("gpt-5.2")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, gpt52)
|
||||||
|
require.InDelta(t, 1.75e-6, gpt52.InputPricePerToken, 1e-12)
|
||||||
|
require.InDelta(t, 3.5e-6, gpt52.InputPricePerTokenPriority, 1e-12)
|
||||||
|
|
||||||
|
gpt52Codex, err := svc.GetModelPricing("gpt-5.2-codex")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, gpt52Codex)
|
||||||
|
require.InDelta(t, 1.75e-6, gpt52Codex.InputPricePerToken, 1e-12)
|
||||||
|
require.InDelta(t, 3.5e-6, gpt52Codex.InputPricePerTokenPriority, 1e-12)
|
||||||
|
require.InDelta(t, 28e-6, gpt52Codex.OutputPricePerTokenPriority, 1e-12)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateCostWithServiceTier_PriorityFallsBackToTierMultiplierWhenExplicitPriceMissing(t *testing.T) {
|
||||||
|
svc := NewBillingService(&config.Config{}, &PricingService{
|
||||||
|
pricingData: map[string]*LiteLLMModelPricing{
|
||||||
|
"custom-no-priority": {
|
||||||
|
InputCostPerToken: 1e-6,
|
||||||
|
OutputCostPerToken: 2e-6,
|
||||||
|
CacheCreationInputTokenCost: 0.5e-6,
|
||||||
|
CacheReadInputTokenCost: 0.25e-6,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
tokens := UsageTokens{InputTokens: 100, OutputTokens: 50, CacheCreationTokens: 40, CacheReadTokens: 20}
|
||||||
|
|
||||||
|
baseCost, err := svc.CalculateCost("custom-no-priority", tokens, 1.0)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
priorityCost, err := svc.CalculateCostWithServiceTier("custom-no-priority", tokens, 1.0, "priority")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
require.InDelta(t, baseCost.InputCost*2, priorityCost.InputCost, 1e-10)
|
||||||
|
require.InDelta(t, baseCost.OutputCost*2, priorityCost.OutputCost, 1e-10)
|
||||||
|
require.InDelta(t, baseCost.CacheCreationCost*2, priorityCost.CacheCreationCost, 1e-10)
|
||||||
|
require.InDelta(t, baseCost.CacheReadCost*2, priorityCost.CacheReadCost, 1e-10)
|
||||||
|
require.InDelta(t, baseCost.TotalCost*2, priorityCost.TotalCost, 1e-10)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetModelPricing_OpenAIGpt52FallbacksExposePriorityPrices(t *testing.T) {
|
||||||
|
svc := newTestBillingService()
|
||||||
|
|
||||||
|
gpt52, err := svc.GetModelPricing("gpt-5.2")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.InDelta(t, 1.75e-6, gpt52.InputPricePerToken, 1e-12)
|
||||||
|
require.InDelta(t, 3.5e-6, gpt52.InputPricePerTokenPriority, 1e-12)
|
||||||
|
require.InDelta(t, 14e-6, gpt52.OutputPricePerToken, 1e-12)
|
||||||
|
require.InDelta(t, 28e-6, gpt52.OutputPricePerTokenPriority, 1e-12)
|
||||||
|
|
||||||
|
gpt52Codex, err := svc.GetModelPricing("gpt-5.2-codex")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.InDelta(t, 1.75e-6, gpt52Codex.InputPricePerToken, 1e-12)
|
||||||
|
require.InDelta(t, 3.5e-6, gpt52Codex.InputPricePerTokenPriority, 1e-12)
|
||||||
|
require.InDelta(t, 14e-6, gpt52Codex.OutputPricePerToken, 1e-12)
|
||||||
|
require.InDelta(t, 28e-6, gpt52Codex.OutputPricePerTokenPriority, 1e-12)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetModelPricing_MapsDynamicPriorityFieldsIntoBillingPricing(t *testing.T) {
|
||||||
|
svc := NewBillingService(&config.Config{}, &PricingService{
|
||||||
|
pricingData: map[string]*LiteLLMModelPricing{
|
||||||
|
"dynamic-tier-model": {
|
||||||
|
InputCostPerToken: 1e-6,
|
||||||
|
InputCostPerTokenPriority: 2e-6,
|
||||||
|
OutputCostPerToken: 3e-6,
|
||||||
|
OutputCostPerTokenPriority: 6e-6,
|
||||||
|
CacheCreationInputTokenCost: 4e-6,
|
||||||
|
CacheCreationInputTokenCostAbove1hr: 5e-6,
|
||||||
|
CacheReadInputTokenCost: 7e-7,
|
||||||
|
CacheReadInputTokenCostPriority: 8e-7,
|
||||||
|
LongContextInputTokenThreshold: 999,
|
||||||
|
LongContextInputCostMultiplier: 1.5,
|
||||||
|
LongContextOutputCostMultiplier: 1.25,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
pricing, err := svc.GetModelPricing("dynamic-tier-model")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.InDelta(t, 1e-6, pricing.InputPricePerToken, 1e-12)
|
||||||
|
require.InDelta(t, 2e-6, pricing.InputPricePerTokenPriority, 1e-12)
|
||||||
|
require.InDelta(t, 3e-6, pricing.OutputPricePerToken, 1e-12)
|
||||||
|
require.InDelta(t, 6e-6, pricing.OutputPricePerTokenPriority, 1e-12)
|
||||||
|
require.InDelta(t, 4e-6, pricing.CacheCreation5mPrice, 1e-12)
|
||||||
|
require.InDelta(t, 5e-6, pricing.CacheCreation1hPrice, 1e-12)
|
||||||
|
require.True(t, pricing.SupportsCacheBreakdown)
|
||||||
|
require.InDelta(t, 7e-7, pricing.CacheReadPricePerToken, 1e-12)
|
||||||
|
require.InDelta(t, 8e-7, pricing.CacheReadPricePerTokenPriority, 1e-12)
|
||||||
|
require.Equal(t, 999, pricing.LongContextInputThreshold)
|
||||||
|
require.InDelta(t, 1.5, pricing.LongContextInputMultiplier, 1e-12)
|
||||||
|
require.InDelta(t, 1.25, pricing.LongContextOutputMultiplier, 1e-12)
|
||||||
|
}
|
||||||
|
|||||||
@@ -334,3 +334,225 @@ func TestOpenAIGatewayServiceRecordUsage_ClampsActualInputTokensToZero(t *testin
|
|||||||
require.NotNil(t, usageRepo.lastLog)
|
require.NotNil(t, usageRepo.lastLog)
|
||||||
require.Equal(t, 0, usageRepo.lastLog.InputTokens)
|
require.Equal(t, 0, usageRepo.lastLog.InputTokens)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestOpenAIGatewayServiceRecordUsage_Gpt54LongContextBillsWholeSession(t *testing.T) {
|
||||||
|
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
|
||||||
|
userRepo := &openAIRecordUsageUserRepoStub{}
|
||||||
|
subRepo := &openAIRecordUsageSubRepoStub{}
|
||||||
|
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
|
||||||
|
|
||||||
|
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
|
||||||
|
Result: &OpenAIForwardResult{
|
||||||
|
RequestID: "resp_gpt54_long_context",
|
||||||
|
Usage: OpenAIUsage{
|
||||||
|
InputTokens: 300000,
|
||||||
|
OutputTokens: 2000,
|
||||||
|
},
|
||||||
|
Model: "gpt-5.4-2026-03-05",
|
||||||
|
Duration: time.Second,
|
||||||
|
},
|
||||||
|
APIKey: &APIKey{ID: 1014},
|
||||||
|
User: &User{ID: 2014},
|
||||||
|
Account: &Account{ID: 3014},
|
||||||
|
})
|
||||||
|
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, usageRepo.lastLog)
|
||||||
|
|
||||||
|
expectedInput := 300000 * 2.5e-6 * 2.0
|
||||||
|
expectedOutput := 2000 * 15e-6 * 1.5
|
||||||
|
require.InDelta(t, expectedInput, usageRepo.lastLog.InputCost, 1e-10)
|
||||||
|
require.InDelta(t, expectedOutput, usageRepo.lastLog.OutputCost, 1e-10)
|
||||||
|
require.InDelta(t, expectedInput+expectedOutput, usageRepo.lastLog.TotalCost, 1e-10)
|
||||||
|
require.InDelta(t, (expectedInput+expectedOutput)*1.1, usageRepo.lastLog.ActualCost, 1e-10)
|
||||||
|
require.Equal(t, 1, userRepo.deductCalls)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOpenAIGatewayServiceRecordUsage_ServiceTierPriorityUsesFastPricing(t *testing.T) {
|
||||||
|
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
|
||||||
|
userRepo := &openAIRecordUsageUserRepoStub{}
|
||||||
|
subRepo := &openAIRecordUsageSubRepoStub{}
|
||||||
|
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
|
||||||
|
serviceTier := "priority"
|
||||||
|
usage := OpenAIUsage{InputTokens: 100, OutputTokens: 50}
|
||||||
|
|
||||||
|
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
|
||||||
|
Result: &OpenAIForwardResult{
|
||||||
|
RequestID: "resp_service_tier_priority",
|
||||||
|
ServiceTier: &serviceTier,
|
||||||
|
Usage: usage,
|
||||||
|
Model: "gpt-5.4",
|
||||||
|
Duration: time.Second,
|
||||||
|
},
|
||||||
|
APIKey: &APIKey{ID: 1015},
|
||||||
|
User: &User{ID: 2015},
|
||||||
|
Account: &Account{ID: 3015},
|
||||||
|
})
|
||||||
|
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, usageRepo.lastLog)
|
||||||
|
require.NotNil(t, usageRepo.lastLog.ServiceTier)
|
||||||
|
require.Equal(t, serviceTier, *usageRepo.lastLog.ServiceTier)
|
||||||
|
|
||||||
|
baseCost, calcErr := svc.billingService.CalculateCost("gpt-5.4", UsageTokens{InputTokens: 100, OutputTokens: 50}, 1.0)
|
||||||
|
require.NoError(t, calcErr)
|
||||||
|
require.InDelta(t, baseCost.TotalCost*2, usageRepo.lastLog.TotalCost, 1e-10)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOpenAIGatewayServiceRecordUsage_ServiceTierFlexHalvesCost(t *testing.T) {
|
||||||
|
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
|
||||||
|
userRepo := &openAIRecordUsageUserRepoStub{}
|
||||||
|
subRepo := &openAIRecordUsageSubRepoStub{}
|
||||||
|
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
|
||||||
|
serviceTier := "flex"
|
||||||
|
usage := OpenAIUsage{InputTokens: 100, OutputTokens: 50, CacheReadInputTokens: 20}
|
||||||
|
|
||||||
|
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
|
||||||
|
Result: &OpenAIForwardResult{
|
||||||
|
RequestID: "resp_service_tier_flex",
|
||||||
|
ServiceTier: &serviceTier,
|
||||||
|
Usage: usage,
|
||||||
|
Model: "gpt-5.4",
|
||||||
|
Duration: time.Second,
|
||||||
|
},
|
||||||
|
APIKey: &APIKey{ID: 1016},
|
||||||
|
User: &User{ID: 2016},
|
||||||
|
Account: &Account{ID: 3016},
|
||||||
|
})
|
||||||
|
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, usageRepo.lastLog)
|
||||||
|
|
||||||
|
baseCost, calcErr := svc.billingService.CalculateCost("gpt-5.4", UsageTokens{InputTokens: 80, OutputTokens: 50, CacheReadTokens: 20}, 1.0)
|
||||||
|
require.NoError(t, calcErr)
|
||||||
|
require.InDelta(t, baseCost.TotalCost*0.5, usageRepo.lastLog.TotalCost, 1e-10)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeOpenAIServiceTier(t *testing.T) {
|
||||||
|
t.Run("fast maps to priority", func(t *testing.T) {
|
||||||
|
got := normalizeOpenAIServiceTier(" fast ")
|
||||||
|
require.NotNil(t, got)
|
||||||
|
require.Equal(t, "priority", *got)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("default ignored", func(t *testing.T) {
|
||||||
|
require.Nil(t, normalizeOpenAIServiceTier("default"))
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("invalid ignored", func(t *testing.T) {
|
||||||
|
require.Nil(t, normalizeOpenAIServiceTier("turbo"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExtractOpenAIServiceTier(t *testing.T) {
|
||||||
|
require.Equal(t, "priority", *extractOpenAIServiceTier(map[string]any{"service_tier": "fast"}))
|
||||||
|
require.Equal(t, "flex", *extractOpenAIServiceTier(map[string]any{"service_tier": "flex"}))
|
||||||
|
require.Nil(t, extractOpenAIServiceTier(map[string]any{"service_tier": 1}))
|
||||||
|
require.Nil(t, extractOpenAIServiceTier(nil))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExtractOpenAIServiceTierFromBody(t *testing.T) {
|
||||||
|
require.Equal(t, "priority", *extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"fast"}`)))
|
||||||
|
require.Equal(t, "flex", *extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"flex"}`)))
|
||||||
|
require.Nil(t, extractOpenAIServiceTierFromBody([]byte(`{"service_tier":"default"}`)))
|
||||||
|
require.Nil(t, extractOpenAIServiceTierFromBody(nil))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOpenAIGatewayServiceRecordUsage_UsesBillingModelAndMetadataFields(t *testing.T) {
|
||||||
|
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
|
||||||
|
userRepo := &openAIRecordUsageUserRepoStub{}
|
||||||
|
subRepo := &openAIRecordUsageSubRepoStub{}
|
||||||
|
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
|
||||||
|
serviceTier := "priority"
|
||||||
|
reasoning := "high"
|
||||||
|
|
||||||
|
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
|
||||||
|
Result: &OpenAIForwardResult{
|
||||||
|
RequestID: "resp_billing_model_override",
|
||||||
|
BillingModel: "gpt-5.1-codex",
|
||||||
|
Model: "gpt-5.1",
|
||||||
|
ServiceTier: &serviceTier,
|
||||||
|
ReasoningEffort: &reasoning,
|
||||||
|
Usage: OpenAIUsage{
|
||||||
|
InputTokens: 20,
|
||||||
|
OutputTokens: 10,
|
||||||
|
},
|
||||||
|
Duration: 2 * time.Second,
|
||||||
|
FirstTokenMs: func() *int { v := 120; return &v }(),
|
||||||
|
},
|
||||||
|
APIKey: &APIKey{ID: 10, GroupID: i64p(11), Group: &Group{ID: 11, RateMultiplier: 1.2}},
|
||||||
|
User: &User{ID: 20},
|
||||||
|
Account: &Account{ID: 30},
|
||||||
|
UserAgent: "codex-cli/1.0",
|
||||||
|
IPAddress: "127.0.0.1",
|
||||||
|
})
|
||||||
|
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, usageRepo.lastLog)
|
||||||
|
require.Equal(t, "gpt-5.1-codex", usageRepo.lastLog.Model)
|
||||||
|
require.NotNil(t, usageRepo.lastLog.ServiceTier)
|
||||||
|
require.Equal(t, serviceTier, *usageRepo.lastLog.ServiceTier)
|
||||||
|
require.NotNil(t, usageRepo.lastLog.ReasoningEffort)
|
||||||
|
require.Equal(t, reasoning, *usageRepo.lastLog.ReasoningEffort)
|
||||||
|
require.NotNil(t, usageRepo.lastLog.UserAgent)
|
||||||
|
require.Equal(t, "codex-cli/1.0", *usageRepo.lastLog.UserAgent)
|
||||||
|
require.NotNil(t, usageRepo.lastLog.IPAddress)
|
||||||
|
require.Equal(t, "127.0.0.1", *usageRepo.lastLog.IPAddress)
|
||||||
|
require.NotNil(t, usageRepo.lastLog.GroupID)
|
||||||
|
require.Equal(t, int64(11), *usageRepo.lastLog.GroupID)
|
||||||
|
require.Equal(t, 1, userRepo.deductCalls)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOpenAIGatewayServiceRecordUsage_SubscriptionBillingSetsSubscriptionFields(t *testing.T) {
|
||||||
|
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
|
||||||
|
userRepo := &openAIRecordUsageUserRepoStub{}
|
||||||
|
subRepo := &openAIRecordUsageSubRepoStub{}
|
||||||
|
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
|
||||||
|
subscription := &UserSubscription{ID: 99}
|
||||||
|
|
||||||
|
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
|
||||||
|
Result: &OpenAIForwardResult{
|
||||||
|
RequestID: "resp_subscription_billing",
|
||||||
|
Usage: OpenAIUsage{InputTokens: 10, OutputTokens: 5},
|
||||||
|
Model: "gpt-5.1",
|
||||||
|
Duration: time.Second,
|
||||||
|
},
|
||||||
|
APIKey: &APIKey{ID: 100, GroupID: i64p(88), Group: &Group{ID: 88, SubscriptionType: SubscriptionTypeSubscription}},
|
||||||
|
User: &User{ID: 200},
|
||||||
|
Account: &Account{ID: 300},
|
||||||
|
Subscription: subscription,
|
||||||
|
})
|
||||||
|
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, usageRepo.lastLog)
|
||||||
|
require.Equal(t, BillingTypeSubscription, usageRepo.lastLog.BillingType)
|
||||||
|
require.NotNil(t, usageRepo.lastLog.SubscriptionID)
|
||||||
|
require.Equal(t, subscription.ID, *usageRepo.lastLog.SubscriptionID)
|
||||||
|
require.Equal(t, 1, subRepo.incrementCalls)
|
||||||
|
require.Equal(t, 0, userRepo.deductCalls)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOpenAIGatewayServiceRecordUsage_SimpleModeSkipsBillingAfterPersist(t *testing.T) {
|
||||||
|
usageRepo := &openAIRecordUsageLogRepoStub{inserted: true}
|
||||||
|
userRepo := &openAIRecordUsageUserRepoStub{}
|
||||||
|
subRepo := &openAIRecordUsageSubRepoStub{}
|
||||||
|
svc := newOpenAIRecordUsageServiceForTest(usageRepo, userRepo, subRepo, nil)
|
||||||
|
svc.cfg.RunMode = config.RunModeSimple
|
||||||
|
|
||||||
|
err := svc.RecordUsage(context.Background(), &OpenAIRecordUsageInput{
|
||||||
|
Result: &OpenAIForwardResult{
|
||||||
|
RequestID: "resp_simple_mode",
|
||||||
|
Usage: OpenAIUsage{InputTokens: 10, OutputTokens: 5},
|
||||||
|
Model: "gpt-5.1",
|
||||||
|
Duration: time.Second,
|
||||||
|
},
|
||||||
|
APIKey: &APIKey{ID: 1000},
|
||||||
|
User: &User{ID: 2000},
|
||||||
|
Account: &Account{ID: 3000},
|
||||||
|
})
|
||||||
|
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, 1, usageRepo.calls)
|
||||||
|
require.Equal(t, 0, userRepo.deductCalls)
|
||||||
|
require.Equal(t, 0, subRepo.incrementCalls)
|
||||||
|
}
|
||||||
|
|||||||
@@ -213,6 +213,9 @@ type OpenAIForwardResult struct {
|
|||||||
// This is set by the Anthropic Messages conversion path where
|
// This is set by the Anthropic Messages conversion path where
|
||||||
// the mapped upstream model differs from the client-facing model.
|
// the mapped upstream model differs from the client-facing model.
|
||||||
BillingModel string
|
BillingModel string
|
||||||
|
// ServiceTier records the OpenAI Responses API service tier, e.g. "priority" / "flex".
|
||||||
|
// Nil means the request did not specify a recognized tier.
|
||||||
|
ServiceTier *string
|
||||||
// ReasoningEffort is extracted from request body (reasoning.effort) or derived from model suffix.
|
// ReasoningEffort is extracted from request body (reasoning.effort) or derived from model suffix.
|
||||||
// Stored for usage records display; nil means not provided / not applicable.
|
// Stored for usage records display; nil means not provided / not applicable.
|
||||||
ReasoningEffort *string
|
ReasoningEffort *string
|
||||||
@@ -2078,11 +2081,13 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
|
|||||||
}
|
}
|
||||||
|
|
||||||
reasoningEffort := extractOpenAIReasoningEffort(reqBody, originalModel)
|
reasoningEffort := extractOpenAIReasoningEffort(reqBody, originalModel)
|
||||||
|
serviceTier := extractOpenAIServiceTier(reqBody)
|
||||||
|
|
||||||
return &OpenAIForwardResult{
|
return &OpenAIForwardResult{
|
||||||
RequestID: resp.Header.Get("x-request-id"),
|
RequestID: resp.Header.Get("x-request-id"),
|
||||||
Usage: *usage,
|
Usage: *usage,
|
||||||
Model: originalModel,
|
Model: originalModel,
|
||||||
|
ServiceTier: serviceTier,
|
||||||
ReasoningEffort: reasoningEffort,
|
ReasoningEffort: reasoningEffort,
|
||||||
Stream: reqStream,
|
Stream: reqStream,
|
||||||
OpenAIWSMode: false,
|
OpenAIWSMode: false,
|
||||||
@@ -2237,6 +2242,7 @@ func (s *OpenAIGatewayService) forwardOpenAIPassthrough(
|
|||||||
RequestID: resp.Header.Get("x-request-id"),
|
RequestID: resp.Header.Get("x-request-id"),
|
||||||
Usage: *usage,
|
Usage: *usage,
|
||||||
Model: reqModel,
|
Model: reqModel,
|
||||||
|
ServiceTier: extractOpenAIServiceTierFromBody(body),
|
||||||
ReasoningEffort: reasoningEffort,
|
ReasoningEffort: reasoningEffort,
|
||||||
Stream: reqStream,
|
Stream: reqStream,
|
||||||
OpenAIWSMode: false,
|
OpenAIWSMode: false,
|
||||||
@@ -3674,7 +3680,11 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
|
|||||||
if result.BillingModel != "" {
|
if result.BillingModel != "" {
|
||||||
billingModel = result.BillingModel
|
billingModel = result.BillingModel
|
||||||
}
|
}
|
||||||
cost, err := s.billingService.CalculateCost(billingModel, tokens, multiplier)
|
serviceTier := ""
|
||||||
|
if result.ServiceTier != nil {
|
||||||
|
serviceTier = strings.TrimSpace(*result.ServiceTier)
|
||||||
|
}
|
||||||
|
cost, err := s.billingService.CalculateCostWithServiceTier(billingModel, tokens, multiplier, serviceTier)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cost = &CostBreakdown{ActualCost: 0}
|
cost = &CostBreakdown{ActualCost: 0}
|
||||||
}
|
}
|
||||||
@@ -3695,6 +3705,7 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
|
|||||||
AccountID: account.ID,
|
AccountID: account.ID,
|
||||||
RequestID: result.RequestID,
|
RequestID: result.RequestID,
|
||||||
Model: billingModel,
|
Model: billingModel,
|
||||||
|
ServiceTier: result.ServiceTier,
|
||||||
ReasoningEffort: result.ReasoningEffort,
|
ReasoningEffort: result.ReasoningEffort,
|
||||||
InputTokens: actualInputTokens,
|
InputTokens: actualInputTokens,
|
||||||
OutputTokens: result.Usage.OutputTokens,
|
OutputTokens: result.Usage.OutputTokens,
|
||||||
@@ -4162,6 +4173,40 @@ func extractOpenAIReasoningEffortFromBody(body []byte, requestedModel string) *s
|
|||||||
return &value
|
return &value
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func extractOpenAIServiceTier(reqBody map[string]any) *string {
|
||||||
|
if reqBody == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
raw, ok := reqBody["service_tier"].(string)
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return normalizeOpenAIServiceTier(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractOpenAIServiceTierFromBody(body []byte) *string {
|
||||||
|
if len(body) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return normalizeOpenAIServiceTier(gjson.GetBytes(body, "service_tier").String())
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeOpenAIServiceTier(raw string) *string {
|
||||||
|
value := strings.ToLower(strings.TrimSpace(raw))
|
||||||
|
if value == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if value == "fast" {
|
||||||
|
value = "priority"
|
||||||
|
}
|
||||||
|
switch value {
|
||||||
|
case "priority", "flex":
|
||||||
|
return &value
|
||||||
|
default:
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func getOpenAIRequestBodyMap(c *gin.Context, body []byte) (map[string]any, error) {
|
func getOpenAIRequestBodyMap(c *gin.Context, body []byte) (map[string]any, error) {
|
||||||
if c != nil {
|
if c != nil {
|
||||||
if cached, ok := c.Get(OpenAIParsedRequestBodyKey); ok {
|
if cached, ok := c.Get(OpenAIParsedRequestBodyKey); ok {
|
||||||
|
|||||||
@@ -671,7 +671,7 @@ func TestOpenAIGatewayService_OAuthPassthrough_StreamingSetsFirstTokenMs(t *test
|
|||||||
c.Request = httptest.NewRequest(http.MethodPost, "/v1/responses", bytes.NewReader(nil))
|
c.Request = httptest.NewRequest(http.MethodPost, "/v1/responses", bytes.NewReader(nil))
|
||||||
c.Request.Header.Set("User-Agent", "codex_cli_rs/0.1.0")
|
c.Request.Header.Set("User-Agent", "codex_cli_rs/0.1.0")
|
||||||
|
|
||||||
originalBody := []byte(`{"model":"gpt-5.2","stream":true,"input":[{"type":"text","text":"hi"}]}`)
|
originalBody := []byte(`{"model":"gpt-5.2","stream":true,"service_tier":"fast","input":[{"type":"text","text":"hi"}]}`)
|
||||||
|
|
||||||
upstreamSSE := strings.Join([]string{
|
upstreamSSE := strings.Join([]string{
|
||||||
`data: {"type":"response.output_text.delta","delta":"h"}`,
|
`data: {"type":"response.output_text.delta","delta":"h"}`,
|
||||||
@@ -711,6 +711,8 @@ func TestOpenAIGatewayService_OAuthPassthrough_StreamingSetsFirstTokenMs(t *test
|
|||||||
require.GreaterOrEqual(t, time.Since(start), time.Duration(0))
|
require.GreaterOrEqual(t, time.Since(start), time.Duration(0))
|
||||||
require.NotNil(t, result.FirstTokenMs)
|
require.NotNil(t, result.FirstTokenMs)
|
||||||
require.GreaterOrEqual(t, *result.FirstTokenMs, 0)
|
require.GreaterOrEqual(t, *result.FirstTokenMs, 0)
|
||||||
|
require.NotNil(t, result.ServiceTier)
|
||||||
|
require.Equal(t, "priority", *result.ServiceTier)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestOpenAIGatewayService_OAuthPassthrough_StreamClientDisconnectStillCollectsUsage(t *testing.T) {
|
func TestOpenAIGatewayService_OAuthPassthrough_StreamClientDisconnectStillCollectsUsage(t *testing.T) {
|
||||||
@@ -777,7 +779,7 @@ func TestOpenAIGatewayService_APIKeyPassthrough_PreservesBodyAndUsesResponsesEnd
|
|||||||
c.Request.Header.Set("User-Agent", "curl/8.0")
|
c.Request.Header.Set("User-Agent", "curl/8.0")
|
||||||
c.Request.Header.Set("X-Test", "keep")
|
c.Request.Header.Set("X-Test", "keep")
|
||||||
|
|
||||||
originalBody := []byte(`{"model":"gpt-5.2","stream":false,"max_output_tokens":128,"input":[{"type":"text","text":"hi"}]}`)
|
originalBody := []byte(`{"model":"gpt-5.2","stream":false,"service_tier":"flex","max_output_tokens":128,"input":[{"type":"text","text":"hi"}]}`)
|
||||||
resp := &http.Response{
|
resp := &http.Response{
|
||||||
StatusCode: http.StatusOK,
|
StatusCode: http.StatusOK,
|
||||||
Header: http.Header{"Content-Type": []string{"application/json"}, "x-request-id": []string{"rid"}},
|
Header: http.Header{"Content-Type": []string{"application/json"}, "x-request-id": []string{"rid"}},
|
||||||
@@ -803,8 +805,11 @@ func TestOpenAIGatewayService_APIKeyPassthrough_PreservesBodyAndUsesResponsesEnd
|
|||||||
RateMultiplier: f64p(1),
|
RateMultiplier: f64p(1),
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err := svc.Forward(context.Background(), c, account, originalBody)
|
result, err := svc.Forward(context.Background(), c, account, originalBody)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, result)
|
||||||
|
require.NotNil(t, result.ServiceTier)
|
||||||
|
require.Equal(t, "flex", *result.ServiceTier)
|
||||||
require.NotNil(t, upstream.lastReq)
|
require.NotNil(t, upstream.lastReq)
|
||||||
require.Equal(t, originalBody, upstream.lastBody)
|
require.Equal(t, originalBody, upstream.lastBody)
|
||||||
require.Equal(t, "https://api.openai.com/v1/responses", upstream.lastReq.URL.String())
|
require.Equal(t, "https://api.openai.com/v1/responses", upstream.lastReq.URL.String())
|
||||||
|
|||||||
@@ -2307,6 +2307,7 @@ func (s *OpenAIGatewayService) forwardOpenAIWSV2(
|
|||||||
RequestID: responseID,
|
RequestID: responseID,
|
||||||
Usage: *usage,
|
Usage: *usage,
|
||||||
Model: originalModel,
|
Model: originalModel,
|
||||||
|
ServiceTier: extractOpenAIServiceTier(reqBody),
|
||||||
ReasoningEffort: extractOpenAIReasoningEffort(reqBody, originalModel),
|
ReasoningEffort: extractOpenAIReasoningEffort(reqBody, originalModel),
|
||||||
Stream: reqStream,
|
Stream: reqStream,
|
||||||
OpenAIWSMode: true,
|
OpenAIWSMode: true,
|
||||||
@@ -2923,6 +2924,7 @@ func (s *OpenAIGatewayService) ProxyResponsesWebSocketFromClient(
|
|||||||
RequestID: responseID,
|
RequestID: responseID,
|
||||||
Usage: usage,
|
Usage: usage,
|
||||||
Model: originalModel,
|
Model: originalModel,
|
||||||
|
ServiceTier: extractOpenAIServiceTierFromBody(payload),
|
||||||
ReasoningEffort: extractOpenAIReasoningEffortFromBody(payload, originalModel),
|
ReasoningEffort: extractOpenAIReasoningEffortFromBody(payload, originalModel),
|
||||||
Stream: reqStream,
|
Stream: reqStream,
|
||||||
OpenAIWSMode: true,
|
OpenAIWSMode: true,
|
||||||
|
|||||||
@@ -399,7 +399,7 @@ func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_PassthroughModeR
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
writeCtx, cancelWrite := context.WithTimeout(context.Background(), 3*time.Second)
|
writeCtx, cancelWrite := context.WithTimeout(context.Background(), 3*time.Second)
|
||||||
err = clientConn.Write(writeCtx, coderws.MessageText, []byte(`{"type":"response.create","model":"gpt-5.1","stream":false}`))
|
err = clientConn.Write(writeCtx, coderws.MessageText, []byte(`{"type":"response.create","model":"gpt-5.1","stream":false,"service_tier":"fast"}`))
|
||||||
cancelWrite()
|
cancelWrite()
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
@@ -424,6 +424,8 @@ func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_PassthroughModeR
|
|||||||
require.True(t, result.OpenAIWSMode)
|
require.True(t, result.OpenAIWSMode)
|
||||||
require.Equal(t, 2, result.Usage.InputTokens)
|
require.Equal(t, 2, result.Usage.InputTokens)
|
||||||
require.Equal(t, 3, result.Usage.OutputTokens)
|
require.Equal(t, 3, result.Usage.OutputTokens)
|
||||||
|
require.NotNil(t, result.ServiceTier)
|
||||||
|
require.Equal(t, "priority", *result.ServiceTier)
|
||||||
case <-time.After(2 * time.Second):
|
case <-time.After(2 * time.Second):
|
||||||
t.Fatal("未收到 passthrough turn 结果回调")
|
t.Fatal("未收到 passthrough turn 结果回调")
|
||||||
}
|
}
|
||||||
@@ -2593,7 +2595,7 @@ func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_ClientDisconnect
|
|||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
writeCtx, cancelWrite := context.WithTimeout(context.Background(), 3*time.Second)
|
writeCtx, cancelWrite := context.WithTimeout(context.Background(), 3*time.Second)
|
||||||
err = clientConn.Write(writeCtx, coderws.MessageText, []byte(`{"type":"response.create","model":"custom-original-model","stream":false}`))
|
err = clientConn.Write(writeCtx, coderws.MessageText, []byte(`{"type":"response.create","model":"custom-original-model","stream":false,"service_tier":"flex"}`))
|
||||||
cancelWrite()
|
cancelWrite()
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
// 立即关闭客户端,模拟客户端在 relay 期间断连。
|
// 立即关闭客户端,模拟客户端在 relay 期间断连。
|
||||||
@@ -2611,6 +2613,8 @@ func TestOpenAIGatewayService_ProxyResponsesWebSocketFromClient_ClientDisconnect
|
|||||||
require.Equal(t, "resp_ingress_disconnect", result.RequestID)
|
require.Equal(t, "resp_ingress_disconnect", result.RequestID)
|
||||||
require.Equal(t, 2, result.Usage.InputTokens)
|
require.Equal(t, 2, result.Usage.InputTokens)
|
||||||
require.Equal(t, 1, result.Usage.OutputTokens)
|
require.Equal(t, 1, result.Usage.OutputTokens)
|
||||||
|
require.NotNil(t, result.ServiceTier)
|
||||||
|
require.Equal(t, "flex", *result.ServiceTier)
|
||||||
case <-time.After(2 * time.Second):
|
case <-time.After(2 * time.Second):
|
||||||
t.Fatal("未收到断连后的 turn 结果回调")
|
t.Fatal("未收到断连后的 turn 结果回调")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -77,6 +77,7 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
|
|||||||
return errors.New("token is empty")
|
return errors.New("token is empty")
|
||||||
}
|
}
|
||||||
requestModel := strings.TrimSpace(gjson.GetBytes(firstClientMessage, "model").String())
|
requestModel := strings.TrimSpace(gjson.GetBytes(firstClientMessage, "model").String())
|
||||||
|
requestServiceTier := extractOpenAIServiceTierFromBody(firstClientMessage)
|
||||||
requestPreviousResponseID := strings.TrimSpace(gjson.GetBytes(firstClientMessage, "previous_response_id").String())
|
requestPreviousResponseID := strings.TrimSpace(gjson.GetBytes(firstClientMessage, "previous_response_id").String())
|
||||||
logOpenAIWSV2Passthrough(
|
logOpenAIWSV2Passthrough(
|
||||||
"relay_start account_id=%d model=%s previous_response_id=%s first_message_type=%s first_message_bytes=%d",
|
"relay_start account_id=%d model=%s previous_response_id=%s first_message_type=%s first_message_bytes=%d",
|
||||||
@@ -178,6 +179,7 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
|
|||||||
CacheReadInputTokens: turn.Usage.CacheReadInputTokens,
|
CacheReadInputTokens: turn.Usage.CacheReadInputTokens,
|
||||||
},
|
},
|
||||||
Model: turn.RequestModel,
|
Model: turn.RequestModel,
|
||||||
|
ServiceTier: requestServiceTier,
|
||||||
Stream: true,
|
Stream: true,
|
||||||
OpenAIWSMode: true,
|
OpenAIWSMode: true,
|
||||||
ResponseHeaders: cloneHeader(handshakeHeaders),
|
ResponseHeaders: cloneHeader(handshakeHeaders),
|
||||||
@@ -225,6 +227,7 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
|
|||||||
CacheReadInputTokens: relayResult.Usage.CacheReadInputTokens,
|
CacheReadInputTokens: relayResult.Usage.CacheReadInputTokens,
|
||||||
},
|
},
|
||||||
Model: relayResult.RequestModel,
|
Model: relayResult.RequestModel,
|
||||||
|
ServiceTier: requestServiceTier,
|
||||||
Stream: true,
|
Stream: true,
|
||||||
OpenAIWSMode: true,
|
OpenAIWSMode: true,
|
||||||
ResponseHeaders: cloneHeader(handshakeHeaders),
|
ResponseHeaders: cloneHeader(handshakeHeaders),
|
||||||
|
|||||||
@@ -40,13 +40,17 @@ var (
|
|||||||
// 只保留我们需要的字段,使用指针来处理可能缺失的值
|
// 只保留我们需要的字段,使用指针来处理可能缺失的值
|
||||||
type LiteLLMModelPricing struct {
|
type LiteLLMModelPricing struct {
|
||||||
InputCostPerToken float64 `json:"input_cost_per_token"`
|
InputCostPerToken float64 `json:"input_cost_per_token"`
|
||||||
|
InputCostPerTokenPriority float64 `json:"input_cost_per_token_priority"`
|
||||||
OutputCostPerToken float64 `json:"output_cost_per_token"`
|
OutputCostPerToken float64 `json:"output_cost_per_token"`
|
||||||
|
OutputCostPerTokenPriority float64 `json:"output_cost_per_token_priority"`
|
||||||
CacheCreationInputTokenCost float64 `json:"cache_creation_input_token_cost"`
|
CacheCreationInputTokenCost float64 `json:"cache_creation_input_token_cost"`
|
||||||
CacheCreationInputTokenCostAbove1hr float64 `json:"cache_creation_input_token_cost_above_1hr"`
|
CacheCreationInputTokenCostAbove1hr float64 `json:"cache_creation_input_token_cost_above_1hr"`
|
||||||
CacheReadInputTokenCost float64 `json:"cache_read_input_token_cost"`
|
CacheReadInputTokenCost float64 `json:"cache_read_input_token_cost"`
|
||||||
|
CacheReadInputTokenCostPriority float64 `json:"cache_read_input_token_cost_priority"`
|
||||||
LongContextInputTokenThreshold int `json:"long_context_input_token_threshold,omitempty"`
|
LongContextInputTokenThreshold int `json:"long_context_input_token_threshold,omitempty"`
|
||||||
LongContextInputCostMultiplier float64 `json:"long_context_input_cost_multiplier,omitempty"`
|
LongContextInputCostMultiplier float64 `json:"long_context_input_cost_multiplier,omitempty"`
|
||||||
LongContextOutputCostMultiplier float64 `json:"long_context_output_cost_multiplier,omitempty"`
|
LongContextOutputCostMultiplier float64 `json:"long_context_output_cost_multiplier,omitempty"`
|
||||||
|
SupportsServiceTier bool `json:"supports_service_tier"`
|
||||||
LiteLLMProvider string `json:"litellm_provider"`
|
LiteLLMProvider string `json:"litellm_provider"`
|
||||||
Mode string `json:"mode"`
|
Mode string `json:"mode"`
|
||||||
SupportsPromptCaching bool `json:"supports_prompt_caching"`
|
SupportsPromptCaching bool `json:"supports_prompt_caching"`
|
||||||
@@ -62,10 +66,14 @@ type PricingRemoteClient interface {
|
|||||||
// LiteLLMRawEntry 用于解析原始JSON数据
|
// LiteLLMRawEntry 用于解析原始JSON数据
|
||||||
type LiteLLMRawEntry struct {
|
type LiteLLMRawEntry struct {
|
||||||
InputCostPerToken *float64 `json:"input_cost_per_token"`
|
InputCostPerToken *float64 `json:"input_cost_per_token"`
|
||||||
|
InputCostPerTokenPriority *float64 `json:"input_cost_per_token_priority"`
|
||||||
OutputCostPerToken *float64 `json:"output_cost_per_token"`
|
OutputCostPerToken *float64 `json:"output_cost_per_token"`
|
||||||
|
OutputCostPerTokenPriority *float64 `json:"output_cost_per_token_priority"`
|
||||||
CacheCreationInputTokenCost *float64 `json:"cache_creation_input_token_cost"`
|
CacheCreationInputTokenCost *float64 `json:"cache_creation_input_token_cost"`
|
||||||
CacheCreationInputTokenCostAbove1hr *float64 `json:"cache_creation_input_token_cost_above_1hr"`
|
CacheCreationInputTokenCostAbove1hr *float64 `json:"cache_creation_input_token_cost_above_1hr"`
|
||||||
CacheReadInputTokenCost *float64 `json:"cache_read_input_token_cost"`
|
CacheReadInputTokenCost *float64 `json:"cache_read_input_token_cost"`
|
||||||
|
CacheReadInputTokenCostPriority *float64 `json:"cache_read_input_token_cost_priority"`
|
||||||
|
SupportsServiceTier bool `json:"supports_service_tier"`
|
||||||
LiteLLMProvider string `json:"litellm_provider"`
|
LiteLLMProvider string `json:"litellm_provider"`
|
||||||
Mode string `json:"mode"`
|
Mode string `json:"mode"`
|
||||||
SupportsPromptCaching bool `json:"supports_prompt_caching"`
|
SupportsPromptCaching bool `json:"supports_prompt_caching"`
|
||||||
@@ -324,14 +332,21 @@ func (s *PricingService) parsePricingData(body []byte) (map[string]*LiteLLMModel
|
|||||||
LiteLLMProvider: entry.LiteLLMProvider,
|
LiteLLMProvider: entry.LiteLLMProvider,
|
||||||
Mode: entry.Mode,
|
Mode: entry.Mode,
|
||||||
SupportsPromptCaching: entry.SupportsPromptCaching,
|
SupportsPromptCaching: entry.SupportsPromptCaching,
|
||||||
|
SupportsServiceTier: entry.SupportsServiceTier,
|
||||||
}
|
}
|
||||||
|
|
||||||
if entry.InputCostPerToken != nil {
|
if entry.InputCostPerToken != nil {
|
||||||
pricing.InputCostPerToken = *entry.InputCostPerToken
|
pricing.InputCostPerToken = *entry.InputCostPerToken
|
||||||
}
|
}
|
||||||
|
if entry.InputCostPerTokenPriority != nil {
|
||||||
|
pricing.InputCostPerTokenPriority = *entry.InputCostPerTokenPriority
|
||||||
|
}
|
||||||
if entry.OutputCostPerToken != nil {
|
if entry.OutputCostPerToken != nil {
|
||||||
pricing.OutputCostPerToken = *entry.OutputCostPerToken
|
pricing.OutputCostPerToken = *entry.OutputCostPerToken
|
||||||
}
|
}
|
||||||
|
if entry.OutputCostPerTokenPriority != nil {
|
||||||
|
pricing.OutputCostPerTokenPriority = *entry.OutputCostPerTokenPriority
|
||||||
|
}
|
||||||
if entry.CacheCreationInputTokenCost != nil {
|
if entry.CacheCreationInputTokenCost != nil {
|
||||||
pricing.CacheCreationInputTokenCost = *entry.CacheCreationInputTokenCost
|
pricing.CacheCreationInputTokenCost = *entry.CacheCreationInputTokenCost
|
||||||
}
|
}
|
||||||
@@ -341,6 +356,9 @@ func (s *PricingService) parsePricingData(body []byte) (map[string]*LiteLLMModel
|
|||||||
if entry.CacheReadInputTokenCost != nil {
|
if entry.CacheReadInputTokenCost != nil {
|
||||||
pricing.CacheReadInputTokenCost = *entry.CacheReadInputTokenCost
|
pricing.CacheReadInputTokenCost = *entry.CacheReadInputTokenCost
|
||||||
}
|
}
|
||||||
|
if entry.CacheReadInputTokenCostPriority != nil {
|
||||||
|
pricing.CacheReadInputTokenCostPriority = *entry.CacheReadInputTokenCostPriority
|
||||||
|
}
|
||||||
if entry.OutputCostPerImage != nil {
|
if entry.OutputCostPerImage != nil {
|
||||||
pricing.OutputCostPerImage = *entry.OutputCostPerImage
|
pricing.OutputCostPerImage = *entry.OutputCostPerImage
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,11 +1,40 @@
|
|||||||
package service
|
package service
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestParsePricingData_ParsesPriorityAndServiceTierFields(t *testing.T) {
|
||||||
|
svc := &PricingService{}
|
||||||
|
body := []byte(`{
|
||||||
|
"gpt-5.4": {
|
||||||
|
"input_cost_per_token": 0.0000025,
|
||||||
|
"input_cost_per_token_priority": 0.000005,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
"output_cost_per_token_priority": 0.00003,
|
||||||
|
"cache_creation_input_token_cost": 0.0000025,
|
||||||
|
"cache_read_input_token_cost": 0.00000025,
|
||||||
|
"cache_read_input_token_cost_priority": 0.0000005,
|
||||||
|
"supports_service_tier": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"litellm_provider": "openai",
|
||||||
|
"mode": "chat"
|
||||||
|
}
|
||||||
|
}`)
|
||||||
|
|
||||||
|
data, err := svc.parsePricingData(body)
|
||||||
|
require.NoError(t, err)
|
||||||
|
pricing := data["gpt-5.4"]
|
||||||
|
require.NotNil(t, pricing)
|
||||||
|
require.InDelta(t, 5e-6, pricing.InputCostPerTokenPriority, 1e-12)
|
||||||
|
require.InDelta(t, 3e-5, pricing.OutputCostPerTokenPriority, 1e-12)
|
||||||
|
require.InDelta(t, 5e-7, pricing.CacheReadInputTokenCostPriority, 1e-12)
|
||||||
|
require.True(t, pricing.SupportsServiceTier)
|
||||||
|
}
|
||||||
|
|
||||||
func TestGetModelPricing_Gpt53CodexSparkUsesGpt51CodexPricing(t *testing.T) {
|
func TestGetModelPricing_Gpt53CodexSparkUsesGpt51CodexPricing(t *testing.T) {
|
||||||
sparkPricing := &LiteLLMModelPricing{InputCostPerToken: 1}
|
sparkPricing := &LiteLLMModelPricing{InputCostPerToken: 1}
|
||||||
gpt53Pricing := &LiteLLMModelPricing{InputCostPerToken: 9}
|
gpt53Pricing := &LiteLLMModelPricing{InputCostPerToken: 9}
|
||||||
@@ -68,3 +97,64 @@ func TestGetModelPricing_Gpt54UsesStaticFallbackWhenRemoteMissing(t *testing.T)
|
|||||||
require.InDelta(t, 2.0, got.LongContextInputCostMultiplier, 1e-12)
|
require.InDelta(t, 2.0, got.LongContextInputCostMultiplier, 1e-12)
|
||||||
require.InDelta(t, 1.5, got.LongContextOutputCostMultiplier, 1e-12)
|
require.InDelta(t, 1.5, got.LongContextOutputCostMultiplier, 1e-12)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParsePricingData_PreservesPriorityAndServiceTierFields(t *testing.T) {
|
||||||
|
raw := map[string]any{
|
||||||
|
"gpt-5.4": map[string]any{
|
||||||
|
"input_cost_per_token": 2.5e-6,
|
||||||
|
"input_cost_per_token_priority": 5e-6,
|
||||||
|
"output_cost_per_token": 15e-6,
|
||||||
|
"output_cost_per_token_priority": 30e-6,
|
||||||
|
"cache_read_input_token_cost": 0.25e-6,
|
||||||
|
"cache_read_input_token_cost_priority": 0.5e-6,
|
||||||
|
"supports_service_tier": true,
|
||||||
|
"supports_prompt_caching": true,
|
||||||
|
"litellm_provider": "openai",
|
||||||
|
"mode": "chat",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
body, err := json.Marshal(raw)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
svc := &PricingService{}
|
||||||
|
pricingMap, err := svc.parsePricingData(body)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
pricing := pricingMap["gpt-5.4"]
|
||||||
|
require.NotNil(t, pricing)
|
||||||
|
require.InDelta(t, 2.5e-6, pricing.InputCostPerToken, 1e-12)
|
||||||
|
require.InDelta(t, 5e-6, pricing.InputCostPerTokenPriority, 1e-12)
|
||||||
|
require.InDelta(t, 15e-6, pricing.OutputCostPerToken, 1e-12)
|
||||||
|
require.InDelta(t, 30e-6, pricing.OutputCostPerTokenPriority, 1e-12)
|
||||||
|
require.InDelta(t, 0.25e-6, pricing.CacheReadInputTokenCost, 1e-12)
|
||||||
|
require.InDelta(t, 0.5e-6, pricing.CacheReadInputTokenCostPriority, 1e-12)
|
||||||
|
require.True(t, pricing.SupportsServiceTier)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParsePricingData_PreservesServiceTierPriorityFields(t *testing.T) {
|
||||||
|
svc := &PricingService{}
|
||||||
|
pricingData, err := svc.parsePricingData([]byte(`{
|
||||||
|
"gpt-5.4": {
|
||||||
|
"input_cost_per_token": 0.0000025,
|
||||||
|
"input_cost_per_token_priority": 0.000005,
|
||||||
|
"output_cost_per_token": 0.000015,
|
||||||
|
"output_cost_per_token_priority": 0.00003,
|
||||||
|
"cache_read_input_token_cost": 0.00000025,
|
||||||
|
"cache_read_input_token_cost_priority": 0.0000005,
|
||||||
|
"supports_service_tier": true,
|
||||||
|
"litellm_provider": "openai",
|
||||||
|
"mode": "chat"
|
||||||
|
}
|
||||||
|
}`))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
pricing := pricingData["gpt-5.4"]
|
||||||
|
require.NotNil(t, pricing)
|
||||||
|
require.InDelta(t, 0.0000025, pricing.InputCostPerToken, 1e-12)
|
||||||
|
require.InDelta(t, 0.000005, pricing.InputCostPerTokenPriority, 1e-12)
|
||||||
|
require.InDelta(t, 0.000015, pricing.OutputCostPerToken, 1e-12)
|
||||||
|
require.InDelta(t, 0.00003, pricing.OutputCostPerTokenPriority, 1e-12)
|
||||||
|
require.InDelta(t, 0.00000025, pricing.CacheReadInputTokenCost, 1e-12)
|
||||||
|
require.InDelta(t, 0.0000005, pricing.CacheReadInputTokenCostPriority, 1e-12)
|
||||||
|
require.True(t, pricing.SupportsServiceTier)
|
||||||
|
}
|
||||||
|
|||||||
@@ -98,6 +98,8 @@ type UsageLog struct {
|
|||||||
AccountID int64
|
AccountID int64
|
||||||
RequestID string
|
RequestID string
|
||||||
Model string
|
Model string
|
||||||
|
// ServiceTier records the OpenAI service tier used for billing, e.g. "priority" / "flex".
|
||||||
|
ServiceTier *string
|
||||||
// ReasoningEffort is the request's reasoning effort level (OpenAI Responses API),
|
// ReasoningEffort is the request's reasoning effort level (OpenAI Responses API),
|
||||||
// e.g. "low" / "medium" / "high" / "xhigh". Nil means not provided / not applicable.
|
// e.g. "low" / "medium" / "high" / "xhigh". Nil means not provided / not applicable.
|
||||||
ReasoningEffort *string
|
ReasoningEffort *string
|
||||||
|
|||||||
5
backend/migrations/070_add_usage_log_service_tier.sql
Normal file
5
backend/migrations/070_add_usage_log_service_tier.sql
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
ALTER TABLE usage_logs
|
||||||
|
ADD COLUMN IF NOT EXISTS service_tier VARCHAR(16);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_usage_logs_service_tier_created_at
|
||||||
|
ON usage_logs (service_tier, created_at);
|
||||||
@@ -236,6 +236,14 @@
|
|||||||
<span class="text-gray-400">{{ t('admin.usage.outputCost') }}</span>
|
<span class="text-gray-400">{{ t('admin.usage.outputCost') }}</span>
|
||||||
<span class="font-medium text-white">${{ tooltipData.output_cost.toFixed(6) }}</span>
|
<span class="font-medium text-white">${{ tooltipData.output_cost.toFixed(6) }}</span>
|
||||||
</div>
|
</div>
|
||||||
|
<div v-if="tooltipData && tooltipData.input_tokens > 0" class="flex items-center justify-between gap-4">
|
||||||
|
<span class="text-gray-400">{{ t('usage.inputTokenPrice') }}</span>
|
||||||
|
<span class="font-medium text-sky-300">{{ formatTokenPricePerMillion(tooltipData.input_cost, tooltipData.input_tokens) }} {{ t('usage.perMillionTokens') }}</span>
|
||||||
|
</div>
|
||||||
|
<div v-if="tooltipData && tooltipData.output_tokens > 0" class="flex items-center justify-between gap-4">
|
||||||
|
<span class="text-gray-400">{{ t('usage.outputTokenPrice') }}</span>
|
||||||
|
<span class="font-medium text-violet-300">{{ formatTokenPricePerMillion(tooltipData.output_cost, tooltipData.output_tokens) }} {{ t('usage.perMillionTokens') }}</span>
|
||||||
|
</div>
|
||||||
<div v-if="tooltipData && tooltipData.cache_creation_cost > 0" class="flex items-center justify-between gap-4">
|
<div v-if="tooltipData && tooltipData.cache_creation_cost > 0" class="flex items-center justify-between gap-4">
|
||||||
<span class="text-gray-400">{{ t('admin.usage.cacheCreationCost') }}</span>
|
<span class="text-gray-400">{{ t('admin.usage.cacheCreationCost') }}</span>
|
||||||
<span class="font-medium text-white">${{ tooltipData.cache_creation_cost.toFixed(6) }}</span>
|
<span class="font-medium text-white">${{ tooltipData.cache_creation_cost.toFixed(6) }}</span>
|
||||||
@@ -246,6 +254,10 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<!-- Rate and Summary -->
|
<!-- Rate and Summary -->
|
||||||
|
<div class="flex items-center justify-between gap-6">
|
||||||
|
<span class="text-gray-400">{{ t('usage.serviceTier') }}</span>
|
||||||
|
<span class="font-semibold text-cyan-300">{{ getUsageServiceTierLabel(tooltipData?.service_tier, t) }}</span>
|
||||||
|
</div>
|
||||||
<div class="flex items-center justify-between gap-6">
|
<div class="flex items-center justify-between gap-6">
|
||||||
<span class="text-gray-400">{{ t('usage.rate') }}</span>
|
<span class="text-gray-400">{{ t('usage.rate') }}</span>
|
||||||
<span class="font-semibold text-blue-400">{{ (tooltipData?.rate_multiplier || 1).toFixed(2) }}x</span>
|
<span class="font-semibold text-blue-400">{{ (tooltipData?.rate_multiplier || 1).toFixed(2) }}x</span>
|
||||||
@@ -279,6 +291,8 @@
|
|||||||
import { ref } from 'vue'
|
import { ref } from 'vue'
|
||||||
import { useI18n } from 'vue-i18n'
|
import { useI18n } from 'vue-i18n'
|
||||||
import { formatDateTime, formatReasoningEffort } from '@/utils/format'
|
import { formatDateTime, formatReasoningEffort } from '@/utils/format'
|
||||||
|
import { formatTokenPricePerMillion } from '@/utils/usagePricing'
|
||||||
|
import { getUsageServiceTierLabel } from '@/utils/usageServiceTier'
|
||||||
import { resolveUsageRequestType } from '@/utils/usageRequestType'
|
import { resolveUsageRequestType } from '@/utils/usageRequestType'
|
||||||
import DataTable from '@/components/common/DataTable.vue'
|
import DataTable from '@/components/common/DataTable.vue'
|
||||||
import EmptyState from '@/components/common/EmptyState.vue'
|
import EmptyState from '@/components/common/EmptyState.vue'
|
||||||
|
|||||||
111
frontend/src/components/admin/usage/__tests__/UsageTable.spec.ts
Normal file
111
frontend/src/components/admin/usage/__tests__/UsageTable.spec.ts
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
import { describe, expect, it, vi, beforeEach } from 'vitest'
|
||||||
|
import { mount } from '@vue/test-utils'
|
||||||
|
import { nextTick } from 'vue'
|
||||||
|
|
||||||
|
import UsageTable from '../UsageTable.vue'
|
||||||
|
|
||||||
|
const messages: Record<string, string> = {
|
||||||
|
'usage.costDetails': 'Cost Breakdown',
|
||||||
|
'admin.usage.inputCost': 'Input Cost',
|
||||||
|
'admin.usage.outputCost': 'Output Cost',
|
||||||
|
'admin.usage.cacheCreationCost': 'Cache Creation Cost',
|
||||||
|
'admin.usage.cacheReadCost': 'Cache Read Cost',
|
||||||
|
'usage.inputTokenPrice': 'Input price',
|
||||||
|
'usage.outputTokenPrice': 'Output price',
|
||||||
|
'usage.perMillionTokens': '/ 1M tokens',
|
||||||
|
'usage.serviceTier': 'Service tier',
|
||||||
|
'usage.serviceTierPriority': 'Fast',
|
||||||
|
'usage.serviceTierFlex': 'Flex',
|
||||||
|
'usage.serviceTierStandard': 'Standard',
|
||||||
|
'usage.rate': 'Rate',
|
||||||
|
'usage.accountMultiplier': 'Account rate',
|
||||||
|
'usage.original': 'Original',
|
||||||
|
'usage.userBilled': 'User billed',
|
||||||
|
'usage.accountBilled': 'Account billed',
|
||||||
|
}
|
||||||
|
|
||||||
|
vi.mock('vue-i18n', async () => {
|
||||||
|
const actual = await vi.importActual<typeof import('vue-i18n')>('vue-i18n')
|
||||||
|
return {
|
||||||
|
...actual,
|
||||||
|
useI18n: () => ({
|
||||||
|
t: (key: string) => messages[key] ?? key,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
const DataTableStub = {
|
||||||
|
props: ['data'],
|
||||||
|
template: `
|
||||||
|
<div>
|
||||||
|
<div v-for="row in data" :key="row.request_id">
|
||||||
|
<slot name="cell-cost" :row="row" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`,
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('admin UsageTable tooltip', () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.spyOn(HTMLElement.prototype, 'getBoundingClientRect').mockReturnValue({
|
||||||
|
x: 0,
|
||||||
|
y: 0,
|
||||||
|
top: 20,
|
||||||
|
left: 20,
|
||||||
|
right: 120,
|
||||||
|
bottom: 40,
|
||||||
|
width: 100,
|
||||||
|
height: 20,
|
||||||
|
toJSON: () => ({}),
|
||||||
|
} as DOMRect)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('shows service tier and billing breakdown in cost tooltip', async () => {
|
||||||
|
const row = {
|
||||||
|
request_id: 'req-admin-1',
|
||||||
|
actual_cost: 0.092883,
|
||||||
|
total_cost: 0.092883,
|
||||||
|
account_rate_multiplier: 1,
|
||||||
|
rate_multiplier: 1,
|
||||||
|
service_tier: 'priority',
|
||||||
|
input_cost: 0.020285,
|
||||||
|
output_cost: 0.00303,
|
||||||
|
cache_creation_cost: 0,
|
||||||
|
cache_read_cost: 0.069568,
|
||||||
|
input_tokens: 4057,
|
||||||
|
output_tokens: 101,
|
||||||
|
}
|
||||||
|
|
||||||
|
const wrapper = mount(UsageTable, {
|
||||||
|
props: {
|
||||||
|
data: [row],
|
||||||
|
loading: false,
|
||||||
|
columns: [],
|
||||||
|
},
|
||||||
|
global: {
|
||||||
|
stubs: {
|
||||||
|
DataTable: DataTableStub,
|
||||||
|
EmptyState: true,
|
||||||
|
Icon: true,
|
||||||
|
Teleport: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await wrapper.find('.group.relative').trigger('mouseenter')
|
||||||
|
await nextTick()
|
||||||
|
|
||||||
|
const text = wrapper.text()
|
||||||
|
expect(text).toContain('Service tier')
|
||||||
|
expect(text).toContain('Fast')
|
||||||
|
expect(text).toContain('Rate')
|
||||||
|
expect(text).toContain('1.00x')
|
||||||
|
expect(text).toContain('Account rate')
|
||||||
|
expect(text).toContain('User billed')
|
||||||
|
expect(text).toContain('Account billed')
|
||||||
|
expect(text).toContain('$0.092883')
|
||||||
|
expect(text).toContain('$5.0000 / 1M tokens')
|
||||||
|
expect(text).toContain('$30.0000 / 1M tokens')
|
||||||
|
expect(text).toContain('$0.069568')
|
||||||
|
})
|
||||||
|
})
|
||||||
20
frontend/src/i18n/__tests__/usageServiceTierLocales.spec.ts
Normal file
20
frontend/src/i18n/__tests__/usageServiceTierLocales.spec.ts
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
import { describe, expect, it } from 'vitest'
|
||||||
|
|
||||||
|
import en from '../locales/en'
|
||||||
|
import zh from '../locales/zh'
|
||||||
|
|
||||||
|
describe('usage service tier locale keys', () => {
|
||||||
|
it('contains zh labels for service tier tooltip', () => {
|
||||||
|
expect(zh.usage.serviceTier).toBe('服务档位')
|
||||||
|
expect(zh.usage.serviceTierPriority).toBe('Fast')
|
||||||
|
expect(zh.usage.serviceTierFlex).toBe('Flex')
|
||||||
|
expect(zh.usage.serviceTierStandard).toBe('Standard')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('contains en labels for service tier tooltip', () => {
|
||||||
|
expect(en.usage.serviceTier).toBe('Service tier')
|
||||||
|
expect(en.usage.serviceTierPriority).toBe('Fast')
|
||||||
|
expect(en.usage.serviceTierFlex).toBe('Flex')
|
||||||
|
expect(en.usage.serviceTierStandard).toBe('Standard')
|
||||||
|
})
|
||||||
|
})
|
||||||
@@ -726,8 +726,15 @@ export default {
|
|||||||
unknown: 'Unknown',
|
unknown: 'Unknown',
|
||||||
in: 'In',
|
in: 'In',
|
||||||
out: 'Out',
|
out: 'Out',
|
||||||
|
inputTokenPrice: 'Input price',
|
||||||
|
outputTokenPrice: 'Output price',
|
||||||
|
perMillionTokens: '/ 1M tokens',
|
||||||
cacheRead: 'Read',
|
cacheRead: 'Read',
|
||||||
cacheWrite: 'Write',
|
cacheWrite: 'Write',
|
||||||
|
serviceTier: 'Service tier',
|
||||||
|
serviceTierPriority: 'Fast',
|
||||||
|
serviceTierFlex: 'Flex',
|
||||||
|
serviceTierStandard: 'Standard',
|
||||||
rate: 'Rate',
|
rate: 'Rate',
|
||||||
original: 'Original',
|
original: 'Original',
|
||||||
billed: 'Billed',
|
billed: 'Billed',
|
||||||
|
|||||||
@@ -731,8 +731,15 @@ export default {
|
|||||||
unknown: '未知',
|
unknown: '未知',
|
||||||
in: '输入',
|
in: '输入',
|
||||||
out: '输出',
|
out: '输出',
|
||||||
|
inputTokenPrice: '输入单价',
|
||||||
|
outputTokenPrice: '输出单价',
|
||||||
|
perMillionTokens: '/ 1M Token',
|
||||||
cacheRead: '读取',
|
cacheRead: '读取',
|
||||||
cacheWrite: '写入',
|
cacheWrite: '写入',
|
||||||
|
serviceTier: '服务档位',
|
||||||
|
serviceTierPriority: 'Fast',
|
||||||
|
serviceTierFlex: 'Flex',
|
||||||
|
serviceTierStandard: 'Standard',
|
||||||
rate: '倍率',
|
rate: '倍率',
|
||||||
original: '原始',
|
original: '原始',
|
||||||
billed: '计费',
|
billed: '计费',
|
||||||
|
|||||||
@@ -934,6 +934,7 @@ export interface UsageLog {
|
|||||||
account_id: number | null
|
account_id: number | null
|
||||||
request_id: string
|
request_id: string
|
||||||
model: string
|
model: string
|
||||||
|
service_tier?: string | null
|
||||||
reasoning_effort?: string | null
|
reasoning_effort?: string | null
|
||||||
|
|
||||||
group_id: number | null
|
group_id: number | null
|
||||||
|
|||||||
39
frontend/src/utils/__tests__/usageServiceTier.spec.ts
Normal file
39
frontend/src/utils/__tests__/usageServiceTier.spec.ts
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
import { describe, expect, it } from 'vitest'
|
||||||
|
|
||||||
|
import { formatUsageServiceTier, getUsageServiceTierLabel, normalizeUsageServiceTier } from '@/utils/usageServiceTier'
|
||||||
|
|
||||||
|
describe('usageServiceTier utils', () => {
|
||||||
|
it('normalizes fast/default aliases', () => {
|
||||||
|
expect(normalizeUsageServiceTier('fast')).toBe('priority')
|
||||||
|
expect(normalizeUsageServiceTier(' default ')).toBe('standard')
|
||||||
|
expect(normalizeUsageServiceTier('STANDARD')).toBe('standard')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('preserves supported tiers', () => {
|
||||||
|
expect(normalizeUsageServiceTier('priority')).toBe('priority')
|
||||||
|
expect(normalizeUsageServiceTier('flex')).toBe('flex')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('formats empty values as standard', () => {
|
||||||
|
expect(formatUsageServiceTier()).toBe('standard')
|
||||||
|
expect(formatUsageServiceTier('')).toBe('standard')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('passes through unknown non-empty tiers for display fallback', () => {
|
||||||
|
expect(normalizeUsageServiceTier('custom-tier')).toBe('custom-tier')
|
||||||
|
expect(formatUsageServiceTier('custom-tier')).toBe('custom-tier')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('maps tiers to translated labels', () => {
|
||||||
|
const translate = (key: string) => ({
|
||||||
|
'usage.serviceTierPriority': 'Fast',
|
||||||
|
'usage.serviceTierFlex': 'Flex',
|
||||||
|
'usage.serviceTierStandard': 'Standard',
|
||||||
|
})[key] ?? key
|
||||||
|
|
||||||
|
expect(getUsageServiceTierLabel('fast', translate)).toBe('Fast')
|
||||||
|
expect(getUsageServiceTierLabel('flex', translate)).toBe('Flex')
|
||||||
|
expect(getUsageServiceTierLabel(undefined, translate)).toBe('Standard')
|
||||||
|
expect(getUsageServiceTierLabel('custom-tier', translate)).toBe('custom-tier')
|
||||||
|
})
|
||||||
|
})
|
||||||
49
frontend/src/utils/usagePricing.ts
Normal file
49
frontend/src/utils/usagePricing.ts
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
export const TOKENS_PER_MILLION = 1_000_000
|
||||||
|
|
||||||
|
interface TokenPriceFormatOptions {
|
||||||
|
fractionDigits?: number
|
||||||
|
withCurrencySymbol?: boolean
|
||||||
|
emptyValue?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
function isFiniteNumber(value: unknown): value is number {
|
||||||
|
return typeof value === 'number' && Number.isFinite(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
export function calculateTokenUnitPrice(
|
||||||
|
cost: number | null | undefined,
|
||||||
|
tokens: number | null | undefined
|
||||||
|
): number | null {
|
||||||
|
if (!isFiniteNumber(cost) || !isFiniteNumber(tokens) || tokens <= 0) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
return cost / tokens
|
||||||
|
}
|
||||||
|
|
||||||
|
export function calculateTokenPricePerMillion(
|
||||||
|
cost: number | null | undefined,
|
||||||
|
tokens: number | null | undefined
|
||||||
|
): number | null {
|
||||||
|
const unitPrice = calculateTokenUnitPrice(cost, tokens)
|
||||||
|
if (unitPrice == null) {
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
return unitPrice * TOKENS_PER_MILLION
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatTokenPricePerMillion(
|
||||||
|
cost: number | null | undefined,
|
||||||
|
tokens: number | null | undefined,
|
||||||
|
options: TokenPriceFormatOptions = {}
|
||||||
|
): string {
|
||||||
|
const pricePerMillion = calculateTokenPricePerMillion(cost, tokens)
|
||||||
|
if (pricePerMillion == null) {
|
||||||
|
return options.emptyValue ?? '-'
|
||||||
|
}
|
||||||
|
|
||||||
|
const fractionDigits = options.fractionDigits ?? 4
|
||||||
|
const formatted = pricePerMillion.toFixed(fractionDigits)
|
||||||
|
return options.withCurrencySymbol == false ? formatted : `$${formatted}`
|
||||||
|
}
|
||||||
25
frontend/src/utils/usageServiceTier.ts
Normal file
25
frontend/src/utils/usageServiceTier.ts
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
export function normalizeUsageServiceTier(serviceTier?: string | null): string | null {
|
||||||
|
const value = serviceTier?.trim().toLowerCase()
|
||||||
|
if (!value) return null
|
||||||
|
if (value === 'fast') return 'priority'
|
||||||
|
if (value === 'default' || value === 'standard') return 'standard'
|
||||||
|
if (value === 'priority' || value === 'flex') return value
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatUsageServiceTier(serviceTier?: string | null): string {
|
||||||
|
const normalized = normalizeUsageServiceTier(serviceTier)
|
||||||
|
if (!normalized) return 'standard'
|
||||||
|
return normalized
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getUsageServiceTierLabel(
|
||||||
|
serviceTier: string | null | undefined,
|
||||||
|
translate: (key: string) => string,
|
||||||
|
): string {
|
||||||
|
const tier = formatUsageServiceTier(serviceTier)
|
||||||
|
if (tier === 'priority') return translate('usage.serviceTierPriority')
|
||||||
|
if (tier === 'flex') return translate('usage.serviceTierFlex')
|
||||||
|
if (tier === 'standard') return translate('usage.serviceTierStandard')
|
||||||
|
return tier
|
||||||
|
}
|
||||||
@@ -426,6 +426,14 @@
|
|||||||
<span class="text-gray-400">{{ t('admin.usage.outputCost') }}</span>
|
<span class="text-gray-400">{{ t('admin.usage.outputCost') }}</span>
|
||||||
<span class="font-medium text-white">${{ tooltipData.output_cost.toFixed(6) }}</span>
|
<span class="font-medium text-white">${{ tooltipData.output_cost.toFixed(6) }}</span>
|
||||||
</div>
|
</div>
|
||||||
|
<div v-if="tooltipData && tooltipData.input_tokens > 0" class="flex items-center justify-between gap-4">
|
||||||
|
<span class="text-gray-400">{{ t('usage.inputTokenPrice') }}</span>
|
||||||
|
<span class="font-medium text-sky-300">{{ formatTokenPricePerMillion(tooltipData.input_cost, tooltipData.input_tokens) }} {{ t('usage.perMillionTokens') }}</span>
|
||||||
|
</div>
|
||||||
|
<div v-if="tooltipData && tooltipData.output_tokens > 0" class="flex items-center justify-between gap-4">
|
||||||
|
<span class="text-gray-400">{{ t('usage.outputTokenPrice') }}</span>
|
||||||
|
<span class="font-medium text-violet-300">{{ formatTokenPricePerMillion(tooltipData.output_cost, tooltipData.output_tokens) }} {{ t('usage.perMillionTokens') }}</span>
|
||||||
|
</div>
|
||||||
<div v-if="tooltipData && tooltipData.cache_creation_cost > 0" class="flex items-center justify-between gap-4">
|
<div v-if="tooltipData && tooltipData.cache_creation_cost > 0" class="flex items-center justify-between gap-4">
|
||||||
<span class="text-gray-400">{{ t('admin.usage.cacheCreationCost') }}</span>
|
<span class="text-gray-400">{{ t('admin.usage.cacheCreationCost') }}</span>
|
||||||
<span class="font-medium text-white">${{ tooltipData.cache_creation_cost.toFixed(6) }}</span>
|
<span class="font-medium text-white">${{ tooltipData.cache_creation_cost.toFixed(6) }}</span>
|
||||||
@@ -436,6 +444,10 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<!-- Rate and Summary -->
|
<!-- Rate and Summary -->
|
||||||
|
<div class="flex items-center justify-between gap-6">
|
||||||
|
<span class="text-gray-400">{{ t('usage.serviceTier') }}</span>
|
||||||
|
<span class="font-semibold text-cyan-300">{{ getUsageServiceTierLabel(tooltipData?.service_tier, t) }}</span>
|
||||||
|
</div>
|
||||||
<div class="flex items-center justify-between gap-6">
|
<div class="flex items-center justify-between gap-6">
|
||||||
<span class="text-gray-400">{{ t('usage.rate') }}</span>
|
<span class="text-gray-400">{{ t('usage.rate') }}</span>
|
||||||
<span class="font-semibold text-blue-400"
|
<span class="font-semibold text-blue-400"
|
||||||
@@ -478,6 +490,8 @@ import Icon from '@/components/icons/Icon.vue'
|
|||||||
import type { UsageLog, ApiKey, UsageQueryParams, UsageStatsResponse } from '@/types'
|
import type { UsageLog, ApiKey, UsageQueryParams, UsageStatsResponse } from '@/types'
|
||||||
import type { Column } from '@/components/common/types'
|
import type { Column } from '@/components/common/types'
|
||||||
import { formatDateTime, formatReasoningEffort } from '@/utils/format'
|
import { formatDateTime, formatReasoningEffort } from '@/utils/format'
|
||||||
|
import { formatTokenPricePerMillion } from '@/utils/usagePricing'
|
||||||
|
import { getUsageServiceTierLabel } from '@/utils/usageServiceTier'
|
||||||
import { resolveUsageRequestType } from '@/utils/usageRequestType'
|
import { resolveUsageRequestType } from '@/utils/usageRequestType'
|
||||||
|
|
||||||
const { t } = useI18n()
|
const { t } = useI18n()
|
||||||
|
|||||||
266
frontend/src/views/user/__tests__/UsageView.spec.ts
Normal file
266
frontend/src/views/user/__tests__/UsageView.spec.ts
Normal file
@@ -0,0 +1,266 @@
|
|||||||
|
import { describe, expect, it, vi, beforeEach } from 'vitest'
|
||||||
|
import { flushPromises, mount } from '@vue/test-utils'
|
||||||
|
import { nextTick } from 'vue'
|
||||||
|
|
||||||
|
import UsageView from '../UsageView.vue'
|
||||||
|
|
||||||
|
const { query, getStatsByDateRange, list, showError, showWarning, showSuccess, showInfo } = vi.hoisted(() => ({
|
||||||
|
query: vi.fn(),
|
||||||
|
getStatsByDateRange: vi.fn(),
|
||||||
|
list: vi.fn(),
|
||||||
|
showError: vi.fn(),
|
||||||
|
showWarning: vi.fn(),
|
||||||
|
showSuccess: vi.fn(),
|
||||||
|
showInfo: vi.fn(),
|
||||||
|
}))
|
||||||
|
|
||||||
|
const messages: Record<string, string> = {
|
||||||
|
'usage.costDetails': 'Cost Breakdown',
|
||||||
|
'admin.usage.inputCost': 'Input Cost',
|
||||||
|
'admin.usage.outputCost': 'Output Cost',
|
||||||
|
'admin.usage.cacheCreationCost': 'Cache Creation Cost',
|
||||||
|
'admin.usage.cacheReadCost': 'Cache Read Cost',
|
||||||
|
'usage.inputTokenPrice': 'Input price',
|
||||||
|
'usage.outputTokenPrice': 'Output price',
|
||||||
|
'usage.perMillionTokens': '/ 1M tokens',
|
||||||
|
'usage.serviceTier': 'Service tier',
|
||||||
|
'usage.serviceTierPriority': 'Fast',
|
||||||
|
'usage.serviceTierFlex': 'Flex',
|
||||||
|
'usage.serviceTierStandard': 'Standard',
|
||||||
|
'usage.rate': 'Rate',
|
||||||
|
'usage.original': 'Original',
|
||||||
|
'usage.billed': 'Billed',
|
||||||
|
'usage.allApiKeys': 'All API Keys',
|
||||||
|
'usage.apiKeyFilter': 'API Key',
|
||||||
|
'usage.model': 'Model',
|
||||||
|
'usage.reasoningEffort': 'Reasoning Effort',
|
||||||
|
'usage.type': 'Type',
|
||||||
|
'usage.tokens': 'Tokens',
|
||||||
|
'usage.cost': 'Cost',
|
||||||
|
'usage.firstToken': 'First Token',
|
||||||
|
'usage.duration': 'Duration',
|
||||||
|
'usage.time': 'Time',
|
||||||
|
'usage.userAgent': 'User Agent',
|
||||||
|
}
|
||||||
|
|
||||||
|
vi.mock('@/api', () => ({
|
||||||
|
usageAPI: {
|
||||||
|
query,
|
||||||
|
getStatsByDateRange,
|
||||||
|
},
|
||||||
|
keysAPI: {
|
||||||
|
list,
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
vi.mock('@/stores/app', () => ({
|
||||||
|
useAppStore: () => ({ showError, showWarning, showSuccess, showInfo }),
|
||||||
|
}))
|
||||||
|
|
||||||
|
vi.mock('vue-i18n', async () => {
|
||||||
|
const actual = await vi.importActual<typeof import('vue-i18n')>('vue-i18n')
|
||||||
|
return {
|
||||||
|
...actual,
|
||||||
|
useI18n: () => ({
|
||||||
|
t: (key: string) => messages[key] ?? key,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
const AppLayoutStub = { template: '<div><slot /></div>' }
|
||||||
|
const TablePageLayoutStub = {
|
||||||
|
template: '<div><slot name="actions" /><slot name="filters" /><slot /></div>',
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('user UsageView tooltip', () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
query.mockReset()
|
||||||
|
getStatsByDateRange.mockReset()
|
||||||
|
list.mockReset()
|
||||||
|
showError.mockReset()
|
||||||
|
showWarning.mockReset()
|
||||||
|
showSuccess.mockReset()
|
||||||
|
showInfo.mockReset()
|
||||||
|
|
||||||
|
vi.spyOn(HTMLElement.prototype, 'getBoundingClientRect').mockReturnValue({
|
||||||
|
x: 0,
|
||||||
|
y: 0,
|
||||||
|
top: 20,
|
||||||
|
left: 20,
|
||||||
|
right: 120,
|
||||||
|
bottom: 40,
|
||||||
|
width: 100,
|
||||||
|
height: 20,
|
||||||
|
toJSON: () => ({}),
|
||||||
|
} as DOMRect)
|
||||||
|
|
||||||
|
;(globalThis as any).ResizeObserver = class {
|
||||||
|
observe() {}
|
||||||
|
disconnect() {}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
it('shows fast service tier and unit prices in user tooltip', async () => {
|
||||||
|
query.mockResolvedValue({
|
||||||
|
items: [
|
||||||
|
{
|
||||||
|
request_id: 'req-user-1',
|
||||||
|
actual_cost: 0.092883,
|
||||||
|
total_cost: 0.092883,
|
||||||
|
rate_multiplier: 1,
|
||||||
|
service_tier: 'priority',
|
||||||
|
input_cost: 0.020285,
|
||||||
|
output_cost: 0.00303,
|
||||||
|
cache_creation_cost: 0,
|
||||||
|
cache_read_cost: 0.069568,
|
||||||
|
input_tokens: 4057,
|
||||||
|
output_tokens: 101,
|
||||||
|
cache_creation_tokens: 0,
|
||||||
|
cache_read_tokens: 278272,
|
||||||
|
cache_creation_5m_tokens: 0,
|
||||||
|
cache_creation_1h_tokens: 0,
|
||||||
|
image_count: 0,
|
||||||
|
image_size: null,
|
||||||
|
first_token_ms: null,
|
||||||
|
duration_ms: 1,
|
||||||
|
created_at: '2026-03-08T00:00:00Z',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
total: 1,
|
||||||
|
pages: 1,
|
||||||
|
})
|
||||||
|
getStatsByDateRange.mockResolvedValue({
|
||||||
|
total_requests: 1,
|
||||||
|
total_tokens: 100,
|
||||||
|
total_cost: 0.1,
|
||||||
|
avg_duration_ms: 1,
|
||||||
|
})
|
||||||
|
list.mockResolvedValue({ items: [] })
|
||||||
|
|
||||||
|
const wrapper = mount(UsageView, {
|
||||||
|
global: {
|
||||||
|
stubs: {
|
||||||
|
AppLayout: AppLayoutStub,
|
||||||
|
TablePageLayout: TablePageLayoutStub,
|
||||||
|
Pagination: true,
|
||||||
|
EmptyState: true,
|
||||||
|
Select: true,
|
||||||
|
DateRangePicker: true,
|
||||||
|
Icon: true,
|
||||||
|
Teleport: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await flushPromises()
|
||||||
|
await nextTick()
|
||||||
|
|
||||||
|
const setupState = (wrapper.vm as any).$?.setupState
|
||||||
|
setupState.tooltipData = {
|
||||||
|
request_id: 'req-user-1',
|
||||||
|
actual_cost: 0.092883,
|
||||||
|
total_cost: 0.092883,
|
||||||
|
rate_multiplier: 1,
|
||||||
|
service_tier: 'priority',
|
||||||
|
input_cost: 0.020285,
|
||||||
|
output_cost: 0.00303,
|
||||||
|
cache_creation_cost: 0,
|
||||||
|
cache_read_cost: 0.069568,
|
||||||
|
input_tokens: 4057,
|
||||||
|
output_tokens: 101,
|
||||||
|
}
|
||||||
|
setupState.tooltipVisible = true
|
||||||
|
await nextTick()
|
||||||
|
|
||||||
|
const text = wrapper.text()
|
||||||
|
expect(text).toContain('Service tier')
|
||||||
|
expect(text).toContain('Fast')
|
||||||
|
expect(text).toContain('Rate')
|
||||||
|
expect(text).toContain('1.00x')
|
||||||
|
expect(text).toContain('Billed')
|
||||||
|
expect(text).toContain('$0.092883')
|
||||||
|
expect(text).toContain('$5.0000 / 1M tokens')
|
||||||
|
expect(text).toContain('$30.0000 / 1M tokens')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('exports csv with input and output unit price columns', async () => {
|
||||||
|
const exportedLogs = [
|
||||||
|
{
|
||||||
|
request_id: 'req-user-export',
|
||||||
|
actual_cost: 0.092883,
|
||||||
|
total_cost: 0.092883,
|
||||||
|
rate_multiplier: 1,
|
||||||
|
service_tier: 'priority',
|
||||||
|
input_cost: 0.020285,
|
||||||
|
output_cost: 0.00303,
|
||||||
|
cache_creation_cost: 0.000001,
|
||||||
|
cache_read_cost: 0.069568,
|
||||||
|
input_tokens: 4057,
|
||||||
|
output_tokens: 101,
|
||||||
|
cache_creation_tokens: 4,
|
||||||
|
cache_read_tokens: 278272,
|
||||||
|
cache_creation_5m_tokens: 0,
|
||||||
|
cache_creation_1h_tokens: 0,
|
||||||
|
image_count: 0,
|
||||||
|
image_size: null,
|
||||||
|
first_token_ms: 12,
|
||||||
|
duration_ms: 345,
|
||||||
|
created_at: '2026-03-08T00:00:00Z',
|
||||||
|
model: 'gpt-5.4',
|
||||||
|
reasoning_effort: null,
|
||||||
|
api_key: { name: 'demo-key' },
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
query.mockResolvedValue({
|
||||||
|
items: exportedLogs,
|
||||||
|
total: 1,
|
||||||
|
pages: 1,
|
||||||
|
})
|
||||||
|
getStatsByDateRange.mockResolvedValue({
|
||||||
|
total_requests: 1,
|
||||||
|
total_tokens: 100,
|
||||||
|
total_cost: 0.1,
|
||||||
|
avg_duration_ms: 1,
|
||||||
|
})
|
||||||
|
list.mockResolvedValue({ items: [] })
|
||||||
|
|
||||||
|
let exportedBlob: Blob | null = null
|
||||||
|
const originalCreateObjectURL = window.URL.createObjectURL
|
||||||
|
const originalRevokeObjectURL = window.URL.revokeObjectURL
|
||||||
|
window.URL.createObjectURL = vi.fn((blob: Blob | MediaSource) => {
|
||||||
|
exportedBlob = blob as Blob
|
||||||
|
return 'blob:usage-export'
|
||||||
|
}) as typeof window.URL.createObjectURL
|
||||||
|
window.URL.revokeObjectURL = vi.fn(() => {}) as typeof window.URL.revokeObjectURL
|
||||||
|
const clickSpy = vi.spyOn(HTMLAnchorElement.prototype, 'click').mockImplementation(() => {})
|
||||||
|
|
||||||
|
const wrapper = mount(UsageView, {
|
||||||
|
global: {
|
||||||
|
stubs: {
|
||||||
|
AppLayout: AppLayoutStub,
|
||||||
|
TablePageLayout: TablePageLayoutStub,
|
||||||
|
Pagination: true,
|
||||||
|
EmptyState: true,
|
||||||
|
Select: true,
|
||||||
|
DateRangePicker: true,
|
||||||
|
Icon: true,
|
||||||
|
Teleport: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
await flushPromises()
|
||||||
|
|
||||||
|
const setupState = (wrapper.vm as any).$?.setupState
|
||||||
|
await setupState.exportToCSV()
|
||||||
|
|
||||||
|
expect(exportedBlob).not.toBeNull()
|
||||||
|
expect(clickSpy).toHaveBeenCalled()
|
||||||
|
expect(showSuccess).toHaveBeenCalled()
|
||||||
|
|
||||||
|
window.URL.createObjectURL = originalCreateObjectURL
|
||||||
|
window.URL.revokeObjectURL = originalRevokeObjectURL
|
||||||
|
clickSpy.mockRestore()
|
||||||
|
})
|
||||||
|
})
|
||||||
Reference in New Issue
Block a user