@@ -11,6 +11,7 @@ import {
11
11
type ReasoningEffort ,
12
12
type VerbosityLevel ,
13
13
type ReasoningEffortWithMinimal ,
14
+ type ServiceTier ,
14
15
} from "@roo-code/types"
15
16
16
17
import type { ApiHandlerOptions } from "../../shared/api"
@@ -36,6 +37,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
36
37
private lastResponseId : string | undefined
37
38
private responseIdPromise : Promise < string | undefined > | undefined
38
39
private responseIdResolver : ( ( value : string | undefined ) => void ) | undefined
40
+ // Resolved service tier from Responses API (actual tier used by OpenAI)
41
+ private lastServiceTier : ServiceTier | undefined
39
42
40
43
// Event types handled by the shared event processor to avoid duplication
41
44
private readonly coreHandledEventTypes = new Set < string > ( [
@@ -90,10 +93,15 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
90
93
const cacheReadTokens =
91
94
usage . cache_read_input_tokens ?? usage . cache_read_tokens ?? usage . cached_tokens ?? cachedFromDetails ?? 0
92
95
96
+ // Resolve effective tier: prefer actual tier from response; otherwise requested tier
97
+ const effectiveTier =
98
+ this . lastServiceTier || ( this . options . openAiNativeServiceTier as ServiceTier | undefined ) || undefined
99
+ const effectiveInfo = this . applyServiceTierPricing ( model . info , effectiveTier )
100
+
93
101
// Pass total input tokens directly to calculateApiCostOpenAI
94
102
// The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
95
103
const totalCost = calculateApiCostOpenAI (
96
- model . info ,
104
+ effectiveInfo ,
97
105
totalInputTokens ,
98
106
totalOutputTokens ,
99
107
cacheWriteTokens ,
@@ -146,6 +154,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
146
154
messages : Anthropic . Messages . MessageParam [ ] ,
147
155
metadata ?: ApiHandlerCreateMessageMetadata ,
148
156
) : ApiStream {
157
+ // Reset resolved tier for this request; will be set from response if present
158
+ this . lastServiceTier = undefined
159
+
149
160
// Use Responses API for ALL models
150
161
const { verbosity, reasoning } = this . getModel ( )
151
162
@@ -233,8 +244,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
233
244
previous_response_id ?: string
234
245
store ?: boolean
235
246
instructions ?: string
247
+ service_tier ?: ServiceTier
236
248
}
237
249
250
+ // Validate requested tier against model support; if not supported, omit.
251
+ const requestedTier = ( this . options . openAiNativeServiceTier as ServiceTier | undefined ) || undefined
252
+ const allowedTierNames = new Set ( model . info . tiers ?. map ( ( t ) => t . name ) . filter ( Boolean ) || [ ] )
253
+
238
254
const body : Gpt5RequestBody = {
239
255
model : model . id ,
240
256
input : formattedInput ,
@@ -262,6 +278,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
262
278
// Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams).
263
279
...( model . maxTokens ? { max_output_tokens : model . maxTokens } : { } ) ,
264
280
...( requestPreviousResponseId && { previous_response_id : requestPreviousResponseId } ) ,
281
+ // Include tier when selected and supported by the model, or when explicitly "default"
282
+ ...( requestedTier &&
283
+ ( requestedTier === "default" || allowedTierNames . has ( requestedTier ) ) && {
284
+ service_tier : requestedTier ,
285
+ } ) ,
265
286
}
266
287
267
288
// Include text.verbosity only when the model explicitly supports it
@@ -636,6 +657,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
636
657
if ( parsed . response ?. id ) {
637
658
this . resolveResponseId ( parsed . response . id )
638
659
}
660
+ // Capture resolved service tier if present
661
+ if ( parsed . response ?. service_tier ) {
662
+ this . lastServiceTier = parsed . response . service_tier as ServiceTier
663
+ }
639
664
640
665
// Delegate standard event types to the shared processor to avoid duplication
641
666
if ( parsed ?. type && this . coreHandledEventTypes . has ( parsed . type ) ) {
@@ -927,6 +952,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
927
952
if ( parsed . response ?. id ) {
928
953
this . resolveResponseId ( parsed . response . id )
929
954
}
955
+ // Capture resolved service tier if present
956
+ if ( parsed . response ?. service_tier ) {
957
+ this . lastServiceTier = parsed . response . service_tier as ServiceTier
958
+ }
930
959
931
960
// Check if the done event contains the complete output (as a fallback)
932
961
if (
@@ -1051,6 +1080,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
1051
1080
if ( event ?. response ?. id ) {
1052
1081
this . resolveResponseId ( event . response . id )
1053
1082
}
1083
+ // Capture resolved service tier when available
1084
+ if ( event ?. response ?. service_tier ) {
1085
+ this . lastServiceTier = event . response . service_tier as ServiceTier
1086
+ }
1054
1087
1055
1088
// Handle known streaming text deltas
1056
1089
if ( event ?. type === "response.text.delta" || event ?. type === "response.output_text.delta" ) {
@@ -1141,6 +1174,26 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
1141
1174
return info . reasoningEffort as ReasoningEffortWithMinimal | undefined
1142
1175
}
1143
1176
1177
+ /**
1178
+ * Returns a shallow-cloned ModelInfo with pricing overridden for the given tier, if available.
1179
+ * If no tier or no overrides exist, the original ModelInfo is returned.
1180
+ */
1181
+ private applyServiceTierPricing ( info : ModelInfo , tier ?: ServiceTier ) : ModelInfo {
1182
+ if ( ! tier || tier === "default" ) return info
1183
+
1184
+ // Find the tier with matching name in the tiers array
1185
+ const tierInfo = info . tiers ?. find ( ( t ) => t . name === tier )
1186
+ if ( ! tierInfo ) return info
1187
+
1188
+ return {
1189
+ ...info ,
1190
+ inputPrice : tierInfo . inputPrice ?? info . inputPrice ,
1191
+ outputPrice : tierInfo . outputPrice ?? info . outputPrice ,
1192
+ cacheReadsPrice : tierInfo . cacheReadsPrice ?? info . cacheReadsPrice ,
1193
+ cacheWritesPrice : tierInfo . cacheWritesPrice ?? info . cacheWritesPrice ,
1194
+ }
1195
+ }
1196
+
1144
1197
// Removed isResponsesApiModel method as ALL models now use the Responses API
1145
1198
1146
1199
override getModel ( ) {
@@ -1214,6 +1267,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
1214
1267
store : false , // Don't store prompt completions
1215
1268
}
1216
1269
1270
+ // Include service tier if selected and supported
1271
+ const requestedTier = ( this . options . openAiNativeServiceTier as ServiceTier | undefined ) || undefined
1272
+ const allowedTierNames = new Set ( model . info . tiers ?. map ( ( t ) => t . name ) . filter ( Boolean ) || [ ] )
1273
+ if ( requestedTier && ( requestedTier === "default" || allowedTierNames . has ( requestedTier ) ) ) {
1274
+ requestBody . service_tier = requestedTier
1275
+ }
1276
+
1217
1277
// Add reasoning if supported
1218
1278
if ( reasoningEffort ) {
1219
1279
requestBody . reasoning = {
0 commit comments