Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions packages/types/src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ export const verbosityLevelsSchema = z.enum(verbosityLevels)

export type VerbosityLevel = z.infer<typeof verbosityLevelsSchema>

/**
* Service tiers (OpenAI Responses API)
*/
export const serviceTiers = ["default", "flex", "priority"] as const
export const serviceTierSchema = z.enum(serviceTiers)
export type ServiceTier = z.infer<typeof serviceTierSchema>

/**
* ModelParameter
*/
Expand Down Expand Up @@ -69,9 +76,15 @@ export const modelInfoSchema = z.object({
minTokensPerCachePoint: z.number().optional(),
maxCachePoints: z.number().optional(),
cachableFields: z.array(z.string()).optional(),
/**
* Service tiers with pricing information.
* Each tier can have a name (for OpenAI service tiers) and pricing overrides.
* The top-level input/output/cache* fields represent the default/standard tier.
*/
tiers: z
.array(
z.object({
name: serviceTierSchema.optional(), // Service tier name (flex, priority, etc.)
contextWindow: z.number(),
inputPrice: z.number().optional(),
outputPrice: z.number().optional(),
Expand Down
5 changes: 4 additions & 1 deletion packages/types/src/provider-settings.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { z } from "zod"

import { modelInfoSchema, reasoningEffortWithMinimalSchema, verbosityLevelsSchema } from "./model.js"
import { modelInfoSchema, reasoningEffortWithMinimalSchema, verbosityLevelsSchema, serviceTierSchema } from "./model.js"
import { codebaseIndexProviderSchema } from "./codebase-index.js"
import {
anthropicModels,
Expand Down Expand Up @@ -224,6 +224,9 @@ const geminiCliSchema = apiModelIdProviderModelSchema.extend({
const openAiNativeSchema = apiModelIdProviderModelSchema.extend({
openAiNativeApiKey: z.string().optional(),
openAiNativeBaseUrl: z.string().optional(),
// OpenAI Responses API service tier for openai-native provider only.
// UI should only expose this when the selected model supports flex/priority.
openAiNativeServiceTier: serviceTierSchema.optional(),
})

const mistralSchema = apiModelIdProviderModelSchema.extend({
Expand Down
32 changes: 32 additions & 0 deletions packages/types/src/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ export const openAiNativeModels = {
// supportsVerbosity is a new capability; ensure ModelInfo includes it
supportsVerbosity: true,
supportsTemperature: false,
tiers: [
{ name: "flex", contextWindow: 400000, inputPrice: 0.625, outputPrice: 5.0, cacheReadsPrice: 0.0625 },
{ name: "priority", contextWindow: 400000, inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 },
],
},
"gpt-5-mini-2025-08-07": {
maxTokens: 128000,
Expand All @@ -46,6 +50,10 @@ export const openAiNativeModels = {
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
supportsVerbosity: true,
supportsTemperature: false,
tiers: [
{ name: "flex", contextWindow: 400000, inputPrice: 0.125, outputPrice: 1.0, cacheReadsPrice: 0.0125 },
{ name: "priority", contextWindow: 400000, inputPrice: 0.45, outputPrice: 3.6, cacheReadsPrice: 0.045 },
],
},
"gpt-5-nano-2025-08-07": {
maxTokens: 128000,
Expand All @@ -60,6 +68,7 @@ export const openAiNativeModels = {
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
supportsVerbosity: true,
supportsTemperature: false,
tiers: [{ name: "flex", contextWindow: 400000, inputPrice: 0.025, outputPrice: 0.2, cacheReadsPrice: 0.0025 }],
},
"gpt-4.1": {
maxTokens: 32_768,
Expand All @@ -70,6 +79,9 @@ export const openAiNativeModels = {
outputPrice: 8,
cacheReadsPrice: 0.5,
supportsTemperature: true,
tiers: [
{ name: "priority", contextWindow: 1_047_576, inputPrice: 3.5, outputPrice: 14.0, cacheReadsPrice: 0.875 },
],
},
"gpt-4.1-mini": {
maxTokens: 32_768,
Expand All @@ -80,6 +92,9 @@ export const openAiNativeModels = {
outputPrice: 1.6,
cacheReadsPrice: 0.1,
supportsTemperature: true,
tiers: [
{ name: "priority", contextWindow: 1_047_576, inputPrice: 0.7, outputPrice: 2.8, cacheReadsPrice: 0.175 },
],
},
"gpt-4.1-nano": {
maxTokens: 32_768,
Expand All @@ -90,6 +105,9 @@ export const openAiNativeModels = {
outputPrice: 0.4,
cacheReadsPrice: 0.025,
supportsTemperature: true,
tiers: [
{ name: "priority", contextWindow: 1_047_576, inputPrice: 0.2, outputPrice: 0.8, cacheReadsPrice: 0.05 },
],
},
o3: {
maxTokens: 100_000,
Expand All @@ -102,6 +120,10 @@ export const openAiNativeModels = {
supportsReasoningEffort: true,
reasoningEffort: "medium",
supportsTemperature: false,
tiers: [
{ name: "flex", contextWindow: 200_000, inputPrice: 1.0, outputPrice: 4.0, cacheReadsPrice: 0.25 },
{ name: "priority", contextWindow: 200_000, inputPrice: 3.5, outputPrice: 14.0, cacheReadsPrice: 0.875 },
],
},
"o3-high": {
maxTokens: 100_000,
Expand Down Expand Up @@ -136,6 +158,10 @@ export const openAiNativeModels = {
supportsReasoningEffort: true,
reasoningEffort: "medium",
supportsTemperature: false,
tiers: [
{ name: "flex", contextWindow: 200_000, inputPrice: 0.55, outputPrice: 2.2, cacheReadsPrice: 0.138 },
{ name: "priority", contextWindow: 200_000, inputPrice: 2.0, outputPrice: 8.0, cacheReadsPrice: 0.5 },
],
},
"o4-mini-high": {
maxTokens: 100_000,
Expand Down Expand Up @@ -232,6 +258,9 @@ export const openAiNativeModels = {
outputPrice: 10,
cacheReadsPrice: 1.25,
supportsTemperature: true,
tiers: [
{ name: "priority", contextWindow: 128_000, inputPrice: 4.25, outputPrice: 17.0, cacheReadsPrice: 2.125 },
],
},
"gpt-4o-mini": {
maxTokens: 16_384,
Expand All @@ -242,6 +271,9 @@ export const openAiNativeModels = {
outputPrice: 0.6,
cacheReadsPrice: 0.075,
supportsTemperature: true,
tiers: [
{ name: "priority", contextWindow: 128_000, inputPrice: 0.25, outputPrice: 1.0, cacheReadsPrice: 0.125 },
],
},
"codex-mini-latest": {
maxTokens: 16_384,
Expand Down
62 changes: 61 additions & 1 deletion src/api/providers/openai-native.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
type ReasoningEffort,
type VerbosityLevel,
type ReasoningEffortWithMinimal,
type ServiceTier,
} from "@roo-code/types"

import type { ApiHandlerOptions } from "../../shared/api"
Expand All @@ -36,6 +37,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
private lastResponseId: string | undefined
private responseIdPromise: Promise<string | undefined> | undefined
private responseIdResolver: ((value: string | undefined) => void) | undefined
// Resolved service tier from Responses API (actual tier used by OpenAI)
private lastServiceTier: ServiceTier | undefined

// Event types handled by the shared event processor to avoid duplication
private readonly coreHandledEventTypes = new Set<string>([
Expand Down Expand Up @@ -90,10 +93,15 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
const cacheReadTokens =
usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? cachedFromDetails ?? 0

// Resolve effective tier: prefer actual tier from response; otherwise requested tier
const effectiveTier =
this.lastServiceTier || (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
const effectiveInfo = this.applyServiceTierPricing(model.info, effectiveTier)

// Pass total input tokens directly to calculateApiCostOpenAI
// The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
const totalCost = calculateApiCostOpenAI(
model.info,
effectiveInfo,
totalInputTokens,
totalOutputTokens,
cacheWriteTokens,
Expand Down Expand Up @@ -146,6 +154,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
messages: Anthropic.Messages.MessageParam[],
metadata?: ApiHandlerCreateMessageMetadata,
): ApiStream {
// Reset resolved tier for this request; will be set from response if present
this.lastServiceTier = undefined

// Use Responses API for ALL models
const { verbosity, reasoning } = this.getModel()

Expand Down Expand Up @@ -233,8 +244,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
previous_response_id?: string
store?: boolean
instructions?: string
service_tier?: ServiceTier
}

// Validate requested tier against model support; if not supported, omit.
const requestedTier = (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
const allowedTierNames = new Set(model.info.tiers?.map((t) => t.name).filter(Boolean) || [])
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider extracting the logic that builds the set of allowed tier names (allowedTierNames) into a shared helper function. This pattern is repeated (e.g. in buildRequestBody and completePrompt), and a helper would improve maintainability if the logic ever changes.

This comment was generated because it violated a code review rule: irule_tTqpIuNs8DV0QFGj.


const body: Gpt5RequestBody = {
model: model.id,
input: formattedInput,
Expand Down Expand Up @@ -262,6 +278,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
// Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams).
...(model.maxTokens ? { max_output_tokens: model.maxTokens } : {}),
...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }),
// Include tier when selected and supported by the model, or when explicitly "default"
...(requestedTier &&
(requestedTier === "default" || allowedTierNames.has(requestedTier)) && {
service_tier: requestedTier,
}),
}

// Include text.verbosity only when the model explicitly supports it
Expand Down Expand Up @@ -636,6 +657,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
if (parsed.response?.id) {
this.resolveResponseId(parsed.response.id)
}
// Capture resolved service tier if present
if (parsed.response?.service_tier) {
this.lastServiceTier = parsed.response.service_tier as ServiceTier
}

// Delegate standard event types to the shared processor to avoid duplication
if (parsed?.type && this.coreHandledEventTypes.has(parsed.type)) {
Expand Down Expand Up @@ -927,6 +952,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
if (parsed.response?.id) {
this.resolveResponseId(parsed.response.id)
}
// Capture resolved service tier if present
if (parsed.response?.service_tier) {
this.lastServiceTier = parsed.response.service_tier as ServiceTier
}

// Check if the done event contains the complete output (as a fallback)
if (
Expand Down Expand Up @@ -1051,6 +1080,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
if (event?.response?.id) {
this.resolveResponseId(event.response.id)
}
// Capture resolved service tier when available
if (event?.response?.service_tier) {
this.lastServiceTier = event.response.service_tier as ServiceTier
}

// Handle known streaming text deltas
if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") {
Expand Down Expand Up @@ -1141,6 +1174,26 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
return info.reasoningEffort as ReasoningEffortWithMinimal | undefined
}

/**
* Returns a shallow-cloned ModelInfo with pricing overridden for the given tier, if available.
* If no tier or no overrides exist, the original ModelInfo is returned.
*/
private applyServiceTierPricing(info: ModelInfo, tier?: ServiceTier): ModelInfo {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be helpful to add JSDoc comments explaining when service_tier is included vs omitted in requests? This would help future maintainers understand the streaming vs non-streaming behavior difference.

if (!tier || tier === "default") return info

// Find the tier with matching name in the tiers array
const tierInfo = info.tiers?.find((t) => t.name === tier)
if (!tierInfo) return info

return {
...info,
inputPrice: tierInfo.inputPrice ?? info.inputPrice,
outputPrice: tierInfo.outputPrice ?? info.outputPrice,
cacheReadsPrice: tierInfo.cacheReadsPrice ?? info.cacheReadsPrice,
cacheWritesPrice: tierInfo.cacheWritesPrice ?? info.cacheWritesPrice,
}
}

// Removed isResponsesApiModel method as ALL models now use the Responses API

override getModel() {
Expand Down Expand Up @@ -1214,6 +1267,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
store: false, // Don't store prompt completions
}

// Include service tier if selected and supported
const requestedTier = (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
const allowedTierNames = new Set(model.info.tiers?.map((t) => t.name).filter(Boolean) || [])
if (requestedTier && (requestedTier === "default" || allowedTierNames.has(requestedTier))) {
requestBody.service_tier = requestedTier
}

// Add reasoning if supported
if (reasoningEffort) {
requestBody.reasoning = {
Expand Down
6 changes: 5 additions & 1 deletion webview-ui/src/components/settings/ApiOptions.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,11 @@ const ApiOptions = ({
)}

{selectedProvider === "openai-native" && (
<OpenAI apiConfiguration={apiConfiguration} setApiConfigurationField={setApiConfigurationField} />
<OpenAI
apiConfiguration={apiConfiguration}
setApiConfigurationField={setApiConfigurationField}
selectedModelInfo={selectedModelInfo}
/>
)}

{selectedProvider === "mistral" && (
Expand Down
Loading
Loading