Skip to content

Commit 571d1a4

Browse files
feat: OpenAI Responses API service tiers (flex/priority) — UI selector, pricing, and tests (#7646)
Co-authored-by: Daniel Riccio <[email protected]>
1 parent 65146b1 commit 571d1a4

File tree

25 files changed

+531
-36
lines changed

25 files changed

+531
-36
lines changed

packages/types/src/model.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@ export const verbosityLevelsSchema = z.enum(verbosityLevels)
2828

2929
export type VerbosityLevel = z.infer<typeof verbosityLevelsSchema>
3030

31+
/**
32+
* Service tiers (OpenAI Responses API)
33+
*/
34+
export const serviceTiers = ["default", "flex", "priority"] as const
35+
export const serviceTierSchema = z.enum(serviceTiers)
36+
export type ServiceTier = z.infer<typeof serviceTierSchema>
37+
3138
/**
3239
* ModelParameter
3340
*/
@@ -69,9 +76,15 @@ export const modelInfoSchema = z.object({
6976
minTokensPerCachePoint: z.number().optional(),
7077
maxCachePoints: z.number().optional(),
7178
cachableFields: z.array(z.string()).optional(),
79+
/**
80+
* Service tiers with pricing information.
81+
* Each tier can have a name (for OpenAI service tiers) and pricing overrides.
82+
* The top-level input/output/cache* fields represent the default/standard tier.
83+
*/
7284
tiers: z
7385
.array(
7486
z.object({
87+
name: serviceTierSchema.optional(), // Service tier name (flex, priority, etc.)
7588
contextWindow: z.number(),
7689
inputPrice: z.number().optional(),
7790
outputPrice: z.number().optional(),

packages/types/src/provider-settings.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { z } from "zod"
22

3-
import { modelInfoSchema, reasoningEffortWithMinimalSchema, verbosityLevelsSchema } from "./model.js"
3+
import { modelInfoSchema, reasoningEffortWithMinimalSchema, verbosityLevelsSchema, serviceTierSchema } from "./model.js"
44
import { codebaseIndexProviderSchema } from "./codebase-index.js"
55
import {
66
anthropicModels,
@@ -225,6 +225,9 @@ const geminiCliSchema = apiModelIdProviderModelSchema.extend({
225225
const openAiNativeSchema = apiModelIdProviderModelSchema.extend({
226226
openAiNativeApiKey: z.string().optional(),
227227
openAiNativeBaseUrl: z.string().optional(),
228+
// OpenAI Responses API service tier for openai-native provider only.
229+
// UI should only expose this when the selected model supports flex/priority.
230+
openAiNativeServiceTier: serviceTierSchema.optional(),
228231
})
229232

230233
const mistralSchema = apiModelIdProviderModelSchema.extend({

packages/types/src/providers/openai.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ export const openAiNativeModels = {
3232
// supportsVerbosity is a new capability; ensure ModelInfo includes it
3333
supportsVerbosity: true,
3434
supportsTemperature: false,
35+
tiers: [
36+
{ name: "flex", contextWindow: 400000, inputPrice: 0.625, outputPrice: 5.0, cacheReadsPrice: 0.0625 },
37+
{ name: "priority", contextWindow: 400000, inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 },
38+
],
3539
},
3640
"gpt-5-mini-2025-08-07": {
3741
maxTokens: 128000,
@@ -46,6 +50,10 @@ export const openAiNativeModels = {
4650
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
4751
supportsVerbosity: true,
4852
supportsTemperature: false,
53+
tiers: [
54+
{ name: "flex", contextWindow: 400000, inputPrice: 0.125, outputPrice: 1.0, cacheReadsPrice: 0.0125 },
55+
{ name: "priority", contextWindow: 400000, inputPrice: 0.45, outputPrice: 3.6, cacheReadsPrice: 0.045 },
56+
],
4957
},
5058
"gpt-5-nano-2025-08-07": {
5159
maxTokens: 128000,
@@ -60,6 +68,7 @@ export const openAiNativeModels = {
6068
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
6169
supportsVerbosity: true,
6270
supportsTemperature: false,
71+
tiers: [{ name: "flex", contextWindow: 400000, inputPrice: 0.025, outputPrice: 0.2, cacheReadsPrice: 0.0025 }],
6372
},
6473
"gpt-4.1": {
6574
maxTokens: 32_768,
@@ -70,6 +79,9 @@ export const openAiNativeModels = {
7079
outputPrice: 8,
7180
cacheReadsPrice: 0.5,
7281
supportsTemperature: true,
82+
tiers: [
83+
{ name: "priority", contextWindow: 1_047_576, inputPrice: 3.5, outputPrice: 14.0, cacheReadsPrice: 0.875 },
84+
],
7385
},
7486
"gpt-4.1-mini": {
7587
maxTokens: 32_768,
@@ -80,6 +92,9 @@ export const openAiNativeModels = {
8092
outputPrice: 1.6,
8193
cacheReadsPrice: 0.1,
8294
supportsTemperature: true,
95+
tiers: [
96+
{ name: "priority", contextWindow: 1_047_576, inputPrice: 0.7, outputPrice: 2.8, cacheReadsPrice: 0.175 },
97+
],
8398
},
8499
"gpt-4.1-nano": {
85100
maxTokens: 32_768,
@@ -90,6 +105,9 @@ export const openAiNativeModels = {
90105
outputPrice: 0.4,
91106
cacheReadsPrice: 0.025,
92107
supportsTemperature: true,
108+
tiers: [
109+
{ name: "priority", contextWindow: 1_047_576, inputPrice: 0.2, outputPrice: 0.8, cacheReadsPrice: 0.05 },
110+
],
93111
},
94112
o3: {
95113
maxTokens: 100_000,
@@ -102,6 +120,10 @@ export const openAiNativeModels = {
102120
supportsReasoningEffort: true,
103121
reasoningEffort: "medium",
104122
supportsTemperature: false,
123+
tiers: [
124+
{ name: "flex", contextWindow: 200_000, inputPrice: 1.0, outputPrice: 4.0, cacheReadsPrice: 0.25 },
125+
{ name: "priority", contextWindow: 200_000, inputPrice: 3.5, outputPrice: 14.0, cacheReadsPrice: 0.875 },
126+
],
105127
},
106128
"o3-high": {
107129
maxTokens: 100_000,
@@ -136,6 +158,10 @@ export const openAiNativeModels = {
136158
supportsReasoningEffort: true,
137159
reasoningEffort: "medium",
138160
supportsTemperature: false,
161+
tiers: [
162+
{ name: "flex", contextWindow: 200_000, inputPrice: 0.55, outputPrice: 2.2, cacheReadsPrice: 0.138 },
163+
{ name: "priority", contextWindow: 200_000, inputPrice: 2.0, outputPrice: 8.0, cacheReadsPrice: 0.5 },
164+
],
139165
},
140166
"o4-mini-high": {
141167
maxTokens: 100_000,
@@ -232,6 +258,9 @@ export const openAiNativeModels = {
232258
outputPrice: 10,
233259
cacheReadsPrice: 1.25,
234260
supportsTemperature: true,
261+
tiers: [
262+
{ name: "priority", contextWindow: 128_000, inputPrice: 4.25, outputPrice: 17.0, cacheReadsPrice: 2.125 },
263+
],
235264
},
236265
"gpt-4o-mini": {
237266
maxTokens: 16_384,
@@ -242,6 +271,9 @@ export const openAiNativeModels = {
242271
outputPrice: 0.6,
243272
cacheReadsPrice: 0.075,
244273
supportsTemperature: true,
274+
tiers: [
275+
{ name: "priority", contextWindow: 128_000, inputPrice: 0.25, outputPrice: 1.0, cacheReadsPrice: 0.125 },
276+
],
245277
},
246278
"codex-mini-latest": {
247279
maxTokens: 16_384,

src/api/providers/openai-native.ts

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import {
1111
type ReasoningEffort,
1212
type VerbosityLevel,
1313
type ReasoningEffortWithMinimal,
14+
type ServiceTier,
1415
} from "@roo-code/types"
1516

1617
import type { ApiHandlerOptions } from "../../shared/api"
@@ -36,6 +37,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
3637
private lastResponseId: string | undefined
3738
private responseIdPromise: Promise<string | undefined> | undefined
3839
private responseIdResolver: ((value: string | undefined) => void) | undefined
40+
// Resolved service tier from Responses API (actual tier used by OpenAI)
41+
private lastServiceTier: ServiceTier | undefined
3942

4043
// Event types handled by the shared event processor to avoid duplication
4144
private readonly coreHandledEventTypes = new Set<string>([
@@ -90,10 +93,15 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
9093
const cacheReadTokens =
9194
usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? cachedFromDetails ?? 0
9295

96+
// Resolve effective tier: prefer actual tier from response; otherwise requested tier
97+
const effectiveTier =
98+
this.lastServiceTier || (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
99+
const effectiveInfo = this.applyServiceTierPricing(model.info, effectiveTier)
100+
93101
// Pass total input tokens directly to calculateApiCostOpenAI
94102
// The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
95103
const totalCost = calculateApiCostOpenAI(
96-
model.info,
104+
effectiveInfo,
97105
totalInputTokens,
98106
totalOutputTokens,
99107
cacheWriteTokens,
@@ -146,6 +154,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
146154
messages: Anthropic.Messages.MessageParam[],
147155
metadata?: ApiHandlerCreateMessageMetadata,
148156
): ApiStream {
157+
// Reset resolved tier for this request; will be set from response if present
158+
this.lastServiceTier = undefined
159+
149160
// Use Responses API for ALL models
150161
const { verbosity, reasoning } = this.getModel()
151162

@@ -233,8 +244,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
233244
previous_response_id?: string
234245
store?: boolean
235246
instructions?: string
247+
service_tier?: ServiceTier
236248
}
237249

250+
// Validate requested tier against model support; if not supported, omit.
251+
const requestedTier = (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
252+
const allowedTierNames = new Set(model.info.tiers?.map((t) => t.name).filter(Boolean) || [])
253+
238254
const body: Gpt5RequestBody = {
239255
model: model.id,
240256
input: formattedInput,
@@ -262,6 +278,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
262278
// Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams).
263279
...(model.maxTokens ? { max_output_tokens: model.maxTokens } : {}),
264280
...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }),
281+
// Include tier when selected and supported by the model, or when explicitly "default"
282+
...(requestedTier &&
283+
(requestedTier === "default" || allowedTierNames.has(requestedTier)) && {
284+
service_tier: requestedTier,
285+
}),
265286
}
266287

267288
// Include text.verbosity only when the model explicitly supports it
@@ -636,6 +657,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
636657
if (parsed.response?.id) {
637658
this.resolveResponseId(parsed.response.id)
638659
}
660+
// Capture resolved service tier if present
661+
if (parsed.response?.service_tier) {
662+
this.lastServiceTier = parsed.response.service_tier as ServiceTier
663+
}
639664

640665
// Delegate standard event types to the shared processor to avoid duplication
641666
if (parsed?.type && this.coreHandledEventTypes.has(parsed.type)) {
@@ -927,6 +952,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
927952
if (parsed.response?.id) {
928953
this.resolveResponseId(parsed.response.id)
929954
}
955+
// Capture resolved service tier if present
956+
if (parsed.response?.service_tier) {
957+
this.lastServiceTier = parsed.response.service_tier as ServiceTier
958+
}
930959

931960
// Check if the done event contains the complete output (as a fallback)
932961
if (
@@ -1051,6 +1080,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
10511080
if (event?.response?.id) {
10521081
this.resolveResponseId(event.response.id)
10531082
}
1083+
// Capture resolved service tier when available
1084+
if (event?.response?.service_tier) {
1085+
this.lastServiceTier = event.response.service_tier as ServiceTier
1086+
}
10541087

10551088
// Handle known streaming text deltas
10561089
if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") {
@@ -1141,6 +1174,26 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
11411174
return info.reasoningEffort as ReasoningEffortWithMinimal | undefined
11421175
}
11431176

1177+
/**
1178+
* Returns a shallow-cloned ModelInfo with pricing overridden for the given tier, if available.
1179+
* If no tier or no overrides exist, the original ModelInfo is returned.
1180+
*/
1181+
private applyServiceTierPricing(info: ModelInfo, tier?: ServiceTier): ModelInfo {
1182+
if (!tier || tier === "default") return info
1183+
1184+
// Find the tier with matching name in the tiers array
1185+
const tierInfo = info.tiers?.find((t) => t.name === tier)
1186+
if (!tierInfo) return info
1187+
1188+
return {
1189+
...info,
1190+
inputPrice: tierInfo.inputPrice ?? info.inputPrice,
1191+
outputPrice: tierInfo.outputPrice ?? info.outputPrice,
1192+
cacheReadsPrice: tierInfo.cacheReadsPrice ?? info.cacheReadsPrice,
1193+
cacheWritesPrice: tierInfo.cacheWritesPrice ?? info.cacheWritesPrice,
1194+
}
1195+
}
1196+
11441197
// Removed isResponsesApiModel method as ALL models now use the Responses API
11451198

11461199
override getModel() {
@@ -1214,6 +1267,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
12141267
store: false, // Don't store prompt completions
12151268
}
12161269

1270+
// Include service tier if selected and supported
1271+
const requestedTier = (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
1272+
const allowedTierNames = new Set(model.info.tiers?.map((t) => t.name).filter(Boolean) || [])
1273+
if (requestedTier && (requestedTier === "default" || allowedTierNames.has(requestedTier))) {
1274+
requestBody.service_tier = requestedTier
1275+
}
1276+
12171277
// Add reasoning if supported
12181278
if (reasoningEffort) {
12191279
requestBody.reasoning = {

webview-ui/src/components/settings/ApiOptions.tsx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,11 @@ const ApiOptions = ({
514514
)}
515515

516516
{selectedProvider === "openai-native" && (
517-
<OpenAI apiConfiguration={apiConfiguration} setApiConfigurationField={setApiConfigurationField} />
517+
<OpenAI
518+
apiConfiguration={apiConfiguration}
519+
setApiConfigurationField={setApiConfigurationField}
520+
selectedModelInfo={selectedModelInfo}
521+
/>
518522
)}
519523

520524
{selectedProvider === "mistral" && (

0 commit comments

Comments
 (0)