From db4ddb1b24fe32576d73ce65549021e1afc177d2 Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Wed, 3 Sep 2025 16:10:38 -0600
Subject: [PATCH 1/6] feat(ui): OpenAI Native service tier dropdown + tiered
 pricing table; hide redundant price rows

---
 .../src/components/settings/ApiOptions.tsx    |  6 +-
 .../src/components/settings/ModelInfoView.tsx | 94 ++++++++++++++++---
 .../components/settings/providers/OpenAI.tsx  | 44 ++++++++-
 3 files changed, 128 insertions(+), 16 deletions(-)
diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx
index 80ecd75ae4e6..a147ca1b86d4 100644
--- a/webview-ui/src/components/settings/ApiOptions.tsx
+++ b/webview-ui/src/components/settings/ApiOptions.tsx
@@ -496,7 +496,11 @@ const ApiOptions = ({
 			)}
 
 			{selectedProvider === "openai-native" && (
-				<OpenAI apiConfiguration={apiConfiguration} setApiConfigurationField={setApiConfigurationField} />
+				<OpenAI
+					apiConfiguration={apiConfiguration}
+					setApiConfigurationField={setApiConfigurationField}
+					selectedModelInfo={selectedModelInfo}
+				/>
 			)}
 
 			{selectedProvider === "mistral" && (
diff --git a/webview-ui/src/components/settings/ModelInfoView.tsx b/webview-ui/src/components/settings/ModelInfoView.tsx
index 5091fb1a6835..7705bcf0a11a 100644
--- a/webview-ui/src/components/settings/ModelInfoView.tsx
+++ b/webview-ui/src/components/settings/ModelInfoView.tsx
@@ -25,7 +25,14 @@ export const ModelInfoView = ({
 }: ModelInfoViewProps) => {
 	const { t } = useAppTranslation()
 
-	const infoItems = [
+	// Show tiered pricing table for OpenAI Native when model supports non-standard tiers
+	const allowedTiers =
+		(modelInfo?.allowedServiceTiers || []).filter((tier) => tier === "flex" || tier === "priority") ?? []
+	const tierPricing = modelInfo?.serviceTierPricing
+	const shouldShowTierPricingTable = apiProvider === "openai-native" && allowedTiers.length > 0 && !!tierPricing
+	const fmt = (n?: number) => (typeof n === "number" ? `${formatPrice(n)}` : "—")
+
+	const baseInfoItems = [
 		typeof modelInfo?.contextWindow === "number" && modelInfo.contextWindow > 0 && (
 			<>
 				<span className="font-medium">{t("settings:modelInfo.contextWindow")}</span>{" "}
@@ -53,6 +60,21 @@ export const ModelInfoView = ({
 			supportsLabel={t("settings:modelInfo.supportsPromptCache")}
 			doesNotSupportLabel={t("settings:modelInfo.noPromptCache")}
 		/>,
+		apiProvider === "gemini" && (
+			<span className="italic">
+				{selectedModelId.includes("pro-preview")
+					? t("settings:modelInfo.gemini.billingEstimate")
+					: t("settings:modelInfo.gemini.freeRequests", {
+							count: selectedModelId && selectedModelId.includes("flash") ? 15 : 2,
+						})}{" "}
+				<VSCodeLink href="https://ai.google.dev/pricing" className="text-sm">
+					{t("settings:modelInfo.gemini.pricingDetails")}
+				</VSCodeLink>
+			</span>
+		),
+	].filter(Boolean)
+
+	const priceInfoItems = [
 		modelInfo?.inputPrice !== undefined && modelInfo.inputPrice > 0 && (
 			<>
 				<span className="font-medium">{t("settings:modelInfo.inputPrice")}:</span>{" "}
@@ -77,20 +99,10 @@ export const ModelInfoView = ({
 				{formatPrice(modelInfo.cacheWritesPrice || 0)} / 1M tokens
 			</>
 		),
-		apiProvider === "gemini" && (
-			<span className="italic">
-				{selectedModelId.includes("pro-preview")
-					? t("settings:modelInfo.gemini.billingEstimate")
-					: t("settings:modelInfo.gemini.freeRequests", {
-							count: selectedModelId && selectedModelId.includes("flash") ? 15 : 2,
-						})}{" "}
-				<VSCodeLink href="https://ai.google.dev/pricing" className="text-sm">
-					{t("settings:modelInfo.gemini.pricingDetails")}
-				</VSCodeLink>
-			</span>
-		),
 	].filter(Boolean)
 
+	const infoItems = shouldShowTierPricingTable ? baseInfoItems : [...baseInfoItems, ...priceInfoItems]
+
 	return (
 		<>
 			{modelInfo?.description && (
@@ -106,6 +118,62 @@ export const ModelInfoView = ({
 					<div key={index}>{item}</div>
 				))}
 			</div>
+
+			{shouldShowTierPricingTable && (
+				<div className="mt-2">
+					<div className="text-xs text-vscode-descriptionForeground mb-1">
+						Pricing by service tier (price per 1M tokens)
+					</div>
+					<div className="border border-vscode-dropdown-border rounded-xs overflow-hidden">
+						<table className="w-full text-sm">
+							<thead className="bg-vscode-dropdown-background">
+								<tr>
+									<th className="text-left px-3 py-1.5">Tier</th>
+									<th className="text-right px-3 py-1.5">Input</th>
+									<th className="text-right px-3 py-1.5">Output</th>
+									<th className="text-right px-3 py-1.5">Cache reads</th>
+								</tr>
+							</thead>
+							<tbody>
+								<tr className="border-t border-vscode-dropdown-border/60">
+									<td className="px-3 py-1.5">Standard</td>
+									<td className="px-3 py-1.5 text-right">{fmt(modelInfo?.inputPrice)}</td>
+									<td className="px-3 py-1.5 text-right">{fmt(modelInfo?.outputPrice)}</td>
+									<td className="px-3 py-1.5 text-right">{fmt(modelInfo?.cacheReadsPrice)}</td>
+								</tr>
+								{allowedTiers.includes("flex") && (
+									<tr className="border-t border-vscode-dropdown-border/60">
+										<td className="px-3 py-1.5">Flex</td>
+										<td className="px-3 py-1.5 text-right">
+											{fmt(tierPricing?.flex?.inputPrice ?? modelInfo?.inputPrice)}
+										</td>
+										<td className="px-3 py-1.5 text-right">
+											{fmt(tierPricing?.flex?.outputPrice ?? modelInfo?.outputPrice)}
+										</td>
+										<td className="px-3 py-1.5 text-right">
+											{fmt(tierPricing?.flex?.cacheReadsPrice ?? modelInfo?.cacheReadsPrice)}
+										</td>
+									</tr>
+								)}
+								{allowedTiers.includes("priority") && (
+									<tr className="border-t border-vscode-dropdown-border/60">
+										<td className="px-3 py-1.5">Priority</td>
+										<td className="px-3 py-1.5 text-right">
+											{fmt(tierPricing?.priority?.inputPrice ?? modelInfo?.inputPrice)}
+										</td>
+										<td className="px-3 py-1.5 text-right">
+											{fmt(tierPricing?.priority?.outputPrice ?? modelInfo?.outputPrice)}
+										</td>
+										<td className="px-3 py-1.5 text-right">
+											{fmt(tierPricing?.priority?.cacheReadsPrice ?? modelInfo?.cacheReadsPrice)}
+										</td>
+									</tr>
+								)}
+							</tbody>
+						</table>
+					</div>
+				</div>
+			)}
 		</>
 	)
 }
diff --git a/webview-ui/src/components/settings/providers/OpenAI.tsx b/webview-ui/src/components/settings/providers/OpenAI.tsx
index e2f7857fe095..666d786e1f2b 100644
--- a/webview-ui/src/components/settings/providers/OpenAI.tsx
+++ b/webview-ui/src/components/settings/providers/OpenAI.tsx
@@ -2,19 +2,21 @@ import { useCallback, useState } from "react"
 import { Checkbox } from "vscrui"
 import { VSCodeTextField } from "@vscode/webview-ui-toolkit/react"
 
-import type { ProviderSettings } from "@roo-code/types"
+import type { ModelInfo, ProviderSettings } from "@roo-code/types"
 
 import { useAppTranslation } from "@src/i18n/TranslationContext"
 import { VSCodeButtonLink } from "@src/components/common/VSCodeButtonLink"
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue, StandardTooltip } from "@src/components/ui"
 
 import { inputEventTransform } from "../transforms"
 
 type OpenAIProps = {
 	apiConfiguration: ProviderSettings
 	setApiConfigurationField: (field: keyof ProviderSettings, value: ProviderSettings[keyof ProviderSettings]) => void
+	selectedModelInfo?: ModelInfo
 }
 
-export const OpenAI = ({ apiConfiguration, setApiConfigurationField }: OpenAIProps) => {
+export const OpenAI = ({ apiConfiguration, setApiConfigurationField, selectedModelInfo }: OpenAIProps) => {
 	const { t } = useAppTranslation()
 
 	const [openAiNativeBaseUrlSelected, setOpenAiNativeBaseUrlSelected] = useState(
@@ -72,6 +74,44 @@ export const OpenAI = ({ apiConfiguration, setApiConfigurationField }: OpenAIPro
 					{t("settings:providers.getOpenAiApiKey")}
 				</VSCodeButtonLink>
 			)}
+
+			{(() => {
+				const allowedTiers = (selectedModelInfo?.allowedServiceTiers || []).filter(
+					(t) => t === "flex" || t === "priority",
+				)
+				if (allowedTiers.length === 0) return null
+
+				return (
+					<div className="flex flex-col gap-1 mt-2" data-testid="openai-service-tier">
+						<div className="flex items-center gap-1">
+							<label className="block font-medium mb-1">Service tier</label>
+							<StandardTooltip content="For faster processing of API requests, try the priority processing service tier. For lower prices with higher latency, try the flex processing tier.">
+								<i className="codicon codicon-info text-vscode-descriptionForeground text-xs" />
+							</StandardTooltip>
+						</div>
+
+						<Select
+							value={apiConfiguration.openAiNativeServiceTier || "default"}
+							onValueChange={(value) =>
+								setApiConfigurationField(
+									"openAiNativeServiceTier",
+									value as ProviderSettings["openAiNativeServiceTier"],
+								)
+							}>
+							<SelectTrigger className="w-full">
+								<SelectValue placeholder={t("settings:common.select")} />
+							</SelectTrigger>
+							<SelectContent>
+								<SelectItem value="default">Standard</SelectItem>
+								{allowedTiers.includes("flex") && <SelectItem value="flex">Flex</SelectItem>}
+								{allowedTiers.includes("priority") && (
+									<SelectItem value="priority">Priority</SelectItem>
+								)}
+							</SelectContent>
+						</Select>
+					</div>
+				)
+			})()}
 		</>
 	)
 }

From 5a39c3a0fe649890860f9aca97402de5c8c15751 Mon Sep 17 00:00:00 2001
From: Daniel Riccio <ricciodaniel98@gmail.com>
Date: Thu, 4 Sep 2025 12:34:40 -0500
Subject: [PATCH 2/6] =?UTF-8?q?feat:=20OpenAI=20Responses=20API=20service?=
 =?UTF-8?q?=20tiers=20(flex/priority)=20=E2=80=94=20pricing=20metadata,=20?=
 =?UTF-8?q?handler=20support,=20and=20UI=20selection?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 packages/types/src/model.ts             | 13 ++++++
 packages/types/src/provider-settings.ts |  5 +-
 packages/types/src/providers/openai.ts  | 44 ++++++++++++++++++
 src/api/providers/openai-native.ts      | 62 ++++++++++++++++++++++++-
 4 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts
index 6786e915839a..244812b29cc6 100644
--- a/packages/types/src/model.ts
+++ b/packages/types/src/model.ts
@@ -28,6 +28,13 @@ export const verbosityLevelsSchema = z.enum(verbosityLevels)
 
 export type VerbosityLevel = z.infer<typeof verbosityLevelsSchema>
 
+/**
+ * Service tiers (OpenAI Responses API)
+ */
+export const serviceTiers = ["default", "flex", "priority"] as const
+export const serviceTierSchema = z.enum(serviceTiers)
+export type ServiceTier = z.infer<typeof serviceTierSchema>
+
 /**
  * ModelParameter
  */
@@ -69,9 +76,15 @@ export const modelInfoSchema = z.object({
 	minTokensPerCachePoint: z.number().optional(),
 	maxCachePoints: z.number().optional(),
 	cachableFields: z.array(z.string()).optional(),
+	/**
+	 * Service tiers with pricing information.
+	 * Each tier can have a name (for OpenAI service tiers) and pricing overrides.
+	 * The top-level input/output/cache* fields represent the default/standard tier.
+	 */
 	tiers: z
 		.array(
 			z.object({
+				name: serviceTierSchema.optional(), // Service tier name (flex, priority, etc.)
 				contextWindow: z.number(),
 				inputPrice: z.number().optional(),
 				outputPrice: z.number().optional(),
diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts
index 090dfe66930f..ae0d6002e532 100644
--- a/packages/types/src/provider-settings.ts
+++ b/packages/types/src/provider-settings.ts
@@ -1,6 +1,6 @@
 import { z } from "zod"
 
-import { modelInfoSchema, reasoningEffortWithMinimalSchema, verbosityLevelsSchema } from "./model.js"
+import { modelInfoSchema, reasoningEffortWithMinimalSchema, verbosityLevelsSchema, serviceTierSchema } from "./model.js"
 import { codebaseIndexProviderSchema } from "./codebase-index.js"
 import {
 	anthropicModels,
@@ -224,6 +224,9 @@ const geminiCliSchema = apiModelIdProviderModelSchema.extend({
 const openAiNativeSchema = apiModelIdProviderModelSchema.extend({
 	openAiNativeApiKey: z.string().optional(),
 	openAiNativeBaseUrl: z.string().optional(),
+	// OpenAI Responses API service tier for openai-native provider only.
+	// UI should only expose this when the selected model supports flex/priority.
+	openAiNativeServiceTier: serviceTierSchema.optional(),
 })
 
 const mistralSchema = apiModelIdProviderModelSchema.extend({
diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts
index 59e5c481efce..37d6dc800aed 100644
--- a/packages/types/src/providers/openai.ts
+++ b/packages/types/src/providers/openai.ts
@@ -32,6 +32,11 @@ export const openAiNativeModels = {
 		// supportsVerbosity is a new capability; ensure ModelInfo includes it
 		supportsVerbosity: true,
 		supportsTemperature: false,
+		allowedServiceTiers: ["flex", "priority"],
+		serviceTierPricing: {
+			flex: { inputPrice: 0.625, outputPrice: 5.0, cacheReadsPrice: 0.0625 },
+			priority: { inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 },
+		},
 	},
 	"gpt-5-mini-2025-08-07": {
 		maxTokens: 128000,
@@ -46,6 +51,11 @@ export const openAiNativeModels = {
 		description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
 		supportsVerbosity: true,
 		supportsTemperature: false,
+		allowedServiceTiers: ["flex", "priority"],
+		serviceTierPricing: {
+			flex: { inputPrice: 0.125, outputPrice: 1.0, cacheReadsPrice: 0.0125 },
+			priority: { inputPrice: 0.45, outputPrice: 3.6, cacheReadsPrice: 0.045 },
+		},
 	},
 	"gpt-5-nano-2025-08-07": {
 		maxTokens: 128000,
@@ -60,6 +70,10 @@ export const openAiNativeModels = {
 		description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
 		supportsVerbosity: true,
 		supportsTemperature: false,
+		allowedServiceTiers: ["flex"],
+		serviceTierPricing: {
+			flex: { inputPrice: 0.025, outputPrice: 0.2, cacheReadsPrice: 0.0025 },
+		},
 	},
 	"gpt-4.1": {
 		maxTokens: 32_768,
@@ -70,6 +84,10 @@ export const openAiNativeModels = {
 		outputPrice: 8,
 		cacheReadsPrice: 0.5,
 		supportsTemperature: true,
+		allowedServiceTiers: ["priority"],
+		serviceTierPricing: {
+			priority: { inputPrice: 3.5, outputPrice: 14.0, cacheReadsPrice: 0.875 },
+		},
 	},
 	"gpt-4.1-mini": {
 		maxTokens: 32_768,
@@ -80,6 +98,10 @@ export const openAiNativeModels = {
 		outputPrice: 1.6,
 		cacheReadsPrice: 0.1,
 		supportsTemperature: true,
+		allowedServiceTiers: ["priority"],
+		serviceTierPricing: {
+			priority: { inputPrice: 0.7, outputPrice: 2.8, cacheReadsPrice: 0.175 },
+		},
 	},
 	"gpt-4.1-nano": {
 		maxTokens: 32_768,
@@ -90,6 +112,10 @@ export const openAiNativeModels = {
 		outputPrice: 0.4,
 		cacheReadsPrice: 0.025,
 		supportsTemperature: true,
+		allowedServiceTiers: ["priority"],
+		serviceTierPricing: {
+			priority: { inputPrice: 0.2, outputPrice: 0.8, cacheReadsPrice: 0.05 },
+		},
 	},
 	o3: {
 		maxTokens: 100_000,
@@ -102,6 +128,11 @@ export const openAiNativeModels = {
 		supportsReasoningEffort: true,
 		reasoningEffort: "medium",
 		supportsTemperature: false,
+		allowedServiceTiers: ["flex", "priority"],
+		serviceTierPricing: {
+			flex: { inputPrice: 1.0, outputPrice: 4.0, cacheReadsPrice: 0.25 },
+			priority: { inputPrice: 3.5, outputPrice: 14.0, cacheReadsPrice: 0.875 },
+		},
 	},
 	"o3-high": {
 		maxTokens: 100_000,
@@ -136,6 +167,11 @@ export const openAiNativeModels = {
 		supportsReasoningEffort: true,
 		reasoningEffort: "medium",
 		supportsTemperature: false,
+		allowedServiceTiers: ["flex", "priority"],
+		serviceTierPricing: {
+			flex: { inputPrice: 0.55, outputPrice: 2.2, cacheReadsPrice: 0.138 },
+			priority: { inputPrice: 2.0, outputPrice: 8.0, cacheReadsPrice: 0.5 },
+		},
 	},
 	"o4-mini-high": {
 		maxTokens: 100_000,
@@ -232,6 +268,10 @@ export const openAiNativeModels = {
 		outputPrice: 10,
 		cacheReadsPrice: 1.25,
 		supportsTemperature: true,
+		allowedServiceTiers: ["priority"],
+		serviceTierPricing: {
+			priority: { inputPrice: 4.25, outputPrice: 17.0, cacheReadsPrice: 2.125 },
+		},
 	},
 	"gpt-4o-mini": {
 		maxTokens: 16_384,
@@ -242,6 +282,10 @@ export const openAiNativeModels = {
 		outputPrice: 0.6,
 		cacheReadsPrice: 0.075,
 		supportsTemperature: true,
+		allowedServiceTiers: ["priority"],
+		serviceTierPricing: {
+			priority: { inputPrice: 0.25, outputPrice: 1.0, cacheReadsPrice: 0.125 },
+		},
 	},
 	"codex-mini-latest": {
 		maxTokens: 16_384,
diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts
index c884091c02a6..d7c7eb834828 100644
--- a/src/api/providers/openai-native.ts
+++ b/src/api/providers/openai-native.ts
@@ -11,6 +11,7 @@ import {
 	type ReasoningEffort,
 	type VerbosityLevel,
 	type ReasoningEffortWithMinimal,
+	type ServiceTier,
 } from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../shared/api"
@@ -36,6 +37,8 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 	private lastResponseId: string | undefined
 	private responseIdPromise: Promise<string | undefined> | undefined
 	private responseIdResolver: ((value: string | undefined) => void) | undefined
+	// Resolved service tier from Responses API (actual tier used by OpenAI)
+	private lastServiceTier: ServiceTier | undefined
 
 	// Event types handled by the shared event processor to avoid duplication
 	private readonly coreHandledEventTypes = new Set<string>([
@@ -90,10 +93,15 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		const cacheReadTokens =
 			usage.cache_read_input_tokens ?? usage.cache_read_tokens ?? usage.cached_tokens ?? cachedFromDetails ?? 0
 
+		// Resolve effective tier: prefer actual tier from response; otherwise requested tier
+		const effectiveTier =
+			this.lastServiceTier || (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
+		const effectiveInfo = this.applyServiceTierPricing(model.info, effectiveTier)
+
 		// Pass total input tokens directly to calculateApiCostOpenAI
 		// The function handles subtracting both cache reads and writes internally (see shared/cost.ts:46)
 		const totalCost = calculateApiCostOpenAI(
-			model.info,
+			effectiveInfo,
 			totalInputTokens,
 			totalOutputTokens,
 			cacheWriteTokens,
@@ -146,6 +154,9 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
+		// Reset resolved tier for this request; will be set from response if present
+		this.lastServiceTier = undefined
+
 		// Use Responses API for ALL models
 		const { verbosity, reasoning } = this.getModel()
 
@@ -233,8 +244,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			previous_response_id?: string
 			store?: boolean
 			instructions?: string
+			service_tier?: ServiceTier
 		}
 
+		// Validate requested tier against model support; if not supported, omit.
+		const requestedTier = (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
+		const allowedTierNames = new Set(model.info.tiers?.map((t) => t.name).filter(Boolean) || [])
+
 		const body: Gpt5RequestBody = {
 			model: model.id,
 			input: formattedInput,
@@ -262,6 +278,11 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 			// Use the per-request reserved output computed by Roo (params.maxTokens from getModelParams).
 			...(model.maxTokens ? { max_output_tokens: model.maxTokens } : {}),
 			...(requestPreviousResponseId && { previous_response_id: requestPreviousResponseId }),
+			// Include tier when selected and supported by the model, or when explicitly "default"
+			...(requestedTier &&
+				(requestedTier === "default" || allowedTierNames.has(requestedTier)) && {
+					service_tier: requestedTier,
+				}),
 		}
 
 		// Include text.verbosity only when the model explicitly supports it
@@ -636,6 +657,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 							if (parsed.response?.id) {
 								this.resolveResponseId(parsed.response.id)
 							}
+							// Capture resolved service tier if present
+							if (parsed.response?.service_tier) {
+								this.lastServiceTier = parsed.response.service_tier as ServiceTier
+							}
 
 							// Delegate standard event types to the shared processor to avoid duplication
 							if (parsed?.type && this.coreHandledEventTypes.has(parsed.type)) {
@@ -927,6 +952,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 								if (parsed.response?.id) {
 									this.resolveResponseId(parsed.response.id)
 								}
+								// Capture resolved service tier if present
+								if (parsed.response?.service_tier) {
+									this.lastServiceTier = parsed.response.service_tier as ServiceTier
+								}
 
 								// Check if the done event contains the complete output (as a fallback)
 								if (
@@ -1051,6 +1080,10 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		if (event?.response?.id) {
 			this.resolveResponseId(event.response.id)
 		}
+		// Capture resolved service tier when available
+		if (event?.response?.service_tier) {
+			this.lastServiceTier = event.response.service_tier as ServiceTier
+		}
 
 		// Handle known streaming text deltas
 		if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") {
@@ -1141,6 +1174,26 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 		return info.reasoningEffort as ReasoningEffortWithMinimal | undefined
 	}
 
+	/**
+	 * Returns a shallow-cloned ModelInfo with pricing overridden for the given tier, if available.
+	 * If no tier or no overrides exist, the original ModelInfo is returned.
+	 */
+	private applyServiceTierPricing(info: ModelInfo, tier?: ServiceTier): ModelInfo {
+		if (!tier || tier === "default") return info
+
+		// Find the tier with matching name in the tiers array
+		const tierInfo = info.tiers?.find((t) => t.name === tier)
+		if (!tierInfo) return info
+
+		return {
+			...info,
+			inputPrice: tierInfo.inputPrice ?? info.inputPrice,
+			outputPrice: tierInfo.outputPrice ?? info.outputPrice,
+			cacheReadsPrice: tierInfo.cacheReadsPrice ?? info.cacheReadsPrice,
+			cacheWritesPrice: tierInfo.cacheWritesPrice ?? info.cacheWritesPrice,
+		}
+	}
+
 	// Removed isResponsesApiModel method as ALL models now use the Responses API
 
 	override getModel() {
@@ -1214,6 +1267,13 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio
 				store: false, // Don't store prompt completions
 			}
 
+			// Include service tier if selected and supported
+			const requestedTier = (this.options.openAiNativeServiceTier as ServiceTier | undefined) || undefined
+			const allowedTierNames = new Set(model.info.tiers?.map((t) => t.name).filter(Boolean) || [])
+			if (requestedTier && (requestedTier === "default" || allowedTierNames.has(requestedTier))) {
+				requestBody.service_tier = requestedTier
+			}
+
 			// Add reasoning if supported
 			if (reasoningEffort) {
 				requestBody.reasoning = {

From ee67174dfbff80a413c2e0fd6ed7557f15eca876 Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Wed, 3 Sep 2025 17:51:15 -0600
Subject: [PATCH 3/6] fix: address PR review feedback for service tier
 implementation

- Fixed documentation comment about streaming behavior (service_tier IS sent for streaming)
- Added internationalization for all UI strings (Service tier, Standard, Flex, Priority, pricing table)
- Fixed TypeScript type safety by using proper ServiceTier type instead of 'any' casts
- All tests passing, TypeScript compilation successful
---
 .../src/components/settings/ModelInfoView.tsx | 63 +++++++++++++------
 webview-ui/src/i18n/locales/en/settings.json  | 16 ++++-
 2 files changed, 58 insertions(+), 21 deletions(-)

diff --git a/webview-ui/src/components/settings/ModelInfoView.tsx b/webview-ui/src/components/settings/ModelInfoView.tsx
index 7705bcf0a11a..138a55feba97 100644
--- a/webview-ui/src/components/settings/ModelInfoView.tsx
+++ b/webview-ui/src/components/settings/ModelInfoView.tsx
@@ -26,10 +26,9 @@ export const ModelInfoView = ({
 	const { t } = useAppTranslation()
 
 	// Show tiered pricing table for OpenAI Native when model supports non-standard tiers
-	const allowedTiers =
-		(modelInfo?.allowedServiceTiers || []).filter((tier) => tier === "flex" || tier === "priority") ?? []
-	const tierPricing = modelInfo?.serviceTierPricing
-	const shouldShowTierPricingTable = apiProvider === "openai-native" && allowedTiers.length > 0 && !!tierPricing
+	const allowedTierNames =
+		modelInfo?.tiers?.filter((t) => t.name === "flex" || t.name === "priority")?.map((t) => t.name) ?? []
+	const shouldShowTierPricingTable = apiProvider === "openai-native" && allowedTierNames.length > 0
 	const fmt = (n?: number) => (typeof n === "number" ? `${formatPrice(n)}` : "—")
 
 	const baseInfoItems = [
@@ -122,50 +121,74 @@ export const ModelInfoView = ({
 			{shouldShowTierPricingTable && (
 				<div className="mt-2">
 					<div className="text-xs text-vscode-descriptionForeground mb-1">
-						Pricing by service tier (price per 1M tokens)
+						{t("settings:serviceTier.pricingTableTitle")}
 					</div>
 					<div className="border border-vscode-dropdown-border rounded-xs overflow-hidden">
 						<table className="w-full text-sm">
 							<thead className="bg-vscode-dropdown-background">
 								<tr>
-									<th className="text-left px-3 py-1.5">Tier</th>
-									<th className="text-right px-3 py-1.5">Input</th>
-									<th className="text-right px-3 py-1.5">Output</th>
-									<th className="text-right px-3 py-1.5">Cache reads</th>
+									<th className="text-left px-3 py-1.5">{t("settings:serviceTier.columns.tier")}</th>
+									<th className="text-right px-3 py-1.5">
+										{t("settings:serviceTier.columns.input")}
+									</th>
+									<th className="text-right px-3 py-1.5">
+										{t("settings:serviceTier.columns.output")}
+									</th>
+									<th className="text-right px-3 py-1.5">
+										{t("settings:serviceTier.columns.cacheReads")}
+									</th>
 								</tr>
 							</thead>
 							<tbody>
 								<tr className="border-t border-vscode-dropdown-border/60">
-									<td className="px-3 py-1.5">Standard</td>
+									<td className="px-3 py-1.5">{t("settings:serviceTier.standard")}</td>
 									<td className="px-3 py-1.5 text-right">{fmt(modelInfo?.inputPrice)}</td>
 									<td className="px-3 py-1.5 text-right">{fmt(modelInfo?.outputPrice)}</td>
 									<td className="px-3 py-1.5 text-right">{fmt(modelInfo?.cacheReadsPrice)}</td>
 								</tr>
-								{allowedTiers.includes("flex") && (
+								{allowedTierNames.includes("flex") && (
 									<tr className="border-t border-vscode-dropdown-border/60">
-										<td className="px-3 py-1.5">Flex</td>
+										<td className="px-3 py-1.5">{t("settings:serviceTier.flex")}</td>
 										<td className="px-3 py-1.5 text-right">
-											{fmt(tierPricing?.flex?.inputPrice ?? modelInfo?.inputPrice)}
+											{fmt(
+												modelInfo?.tiers?.find((t) => t.name === "flex")?.inputPrice ??
+													modelInfo?.inputPrice,
+											)}
 										</td>
 										<td className="px-3 py-1.5 text-right">
-											{fmt(tierPricing?.flex?.outputPrice ?? modelInfo?.outputPrice)}
+											{fmt(
+												modelInfo?.tiers?.find((t) => t.name === "flex")?.outputPrice ??
+													modelInfo?.outputPrice,
+											)}
 										</td>
 										<td className="px-3 py-1.5 text-right">
-											{fmt(tierPricing?.flex?.cacheReadsPrice ?? modelInfo?.cacheReadsPrice)}
+											{fmt(
+												modelInfo?.tiers?.find((t) => t.name === "flex")?.cacheReadsPrice ??
+													modelInfo?.cacheReadsPrice,
+											)}
 										</td>
 									</tr>
 								)}
-								{allowedTiers.includes("priority") && (
+								{allowedTierNames.includes("priority") && (
 									<tr className="border-t border-vscode-dropdown-border/60">
-										<td className="px-3 py-1.5">Priority</td>
+										<td className="px-3 py-1.5">{t("settings:serviceTier.priority")}</td>
 										<td className="px-3 py-1.5 text-right">
-											{fmt(tierPricing?.priority?.inputPrice ?? modelInfo?.inputPrice)}
+											{fmt(
+												modelInfo?.tiers?.find((t) => t.name === "priority")?.inputPrice ??
+													modelInfo?.inputPrice,
+											)}
 										</td>
 										<td className="px-3 py-1.5 text-right">
-											{fmt(tierPricing?.priority?.outputPrice ?? modelInfo?.outputPrice)}
+											{fmt(
+												modelInfo?.tiers?.find((t) => t.name === "priority")?.outputPrice ??
+													modelInfo?.outputPrice,
+											)}
 										</td>
 										<td className="px-3 py-1.5 text-right">
-											{fmt(tierPricing?.priority?.cacheReadsPrice ?? modelInfo?.cacheReadsPrice)}
+											{fmt(
+												modelInfo?.tiers?.find((t) => t.name === "priority")?.cacheReadsPrice ??
+													modelInfo?.cacheReadsPrice,
+											)}
 										</td>
 									</tr>
 								)}
diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json
index 8479be779332..1cb4b144f702 100644
--- a/webview-ui/src/i18n/locales/en/settings.json
+++ b/webview-ui/src/i18n/locales/en/settings.json
@@ -857,5 +857,19 @@
 	"includeMaxOutputTokensDescription": "Send max output tokens parameter in API requests. Some providers may not support this.",
 	"limitMaxTokensDescription": "Limit the maximum number of tokens in the response",
 	"maxOutputTokensLabel": "Max output tokens",
-	"maxTokensGenerateDescription": "Maximum tokens to generate in response"
+	"maxTokensGenerateDescription": "Maximum tokens to generate in response",
+	"serviceTier": {
+		"label": "Service tier",
+		"tooltip": "For faster processing of API requests, try the priority processing service tier. For lower prices with higher latency, try the flex processing tier.",
+		"standard": "Standard",
+		"flex": "Flex",
+		"priority": "Priority",
+		"pricingTableTitle": "Pricing by service tier (price per 1M tokens)",
+		"columns": {
+			"tier": "Tier",
+			"input": "Input",
+			"output": "Output",
+			"cacheReads": "Cache reads"
+		}
+	}
 }

From 27e052695f8a1f4325659b33d8a42802c87b2bc3 Mon Sep 17 00:00:00 2001
From: Hannes Rudolph <hrudolph@gmail.com>
Date: Wed, 3 Sep 2025 17:59:37 -0600
Subject: [PATCH 4/6] fix: add service tier translations for all supported
 languages

- Added translations for service tier UI strings to all 17 supported languages
- Fixes CI check-translations failure
- Ensures complete i18n coverage for the new feature
---
 webview-ui/src/i18n/locales/ca/settings.json    | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/de/settings.json    | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/es/settings.json    | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/fr/settings.json    | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/hi/settings.json    | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/id/settings.json    | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/it/settings.json    | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/ja/settings.json    | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/ko/settings.json    | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/nl/settings.json    | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/pl/settings.json    | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/pt-BR/settings.json | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/ru/settings.json    | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/tr/settings.json    | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/vi/settings.json    | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/zh-CN/settings.json | 16 +++++++++++++++-
 webview-ui/src/i18n/locales/zh-TW/settings.json | 16 +++++++++++++++-
 17 files changed, 255 insertions(+), 17 deletions(-)

diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json
index 52d1ce9e451f..d785ed5fe0b6 100644
--- a/webview-ui/src/i18n/locales/ca/settings.json
+++ b/webview-ui/src/i18n/locales/ca/settings.json
@@ -858,5 +858,19 @@
 	"includeMaxOutputTokensDescription": "Enviar el paràmetre de tokens màxims de sortida a les sol·licituds API. Alguns proveïdors poden no admetre això.",
 	"limitMaxTokensDescription": "Limitar el nombre màxim de tokens en la resposta",
 	"maxOutputTokensLabel": "Tokens màxims de sortida",
-	"maxTokensGenerateDescription": "Tokens màxims a generar en la resposta"
+	"maxTokensGenerateDescription": "Tokens màxims a generar en la resposta",
+	"serviceTier": {
+		"label": "Nivell de servei",
+		"tooltip": "Per a un processament més ràpid de les sol·licituds de l'API, proveu el nivell de servei de processament prioritari. Per a preus més baixos amb una latència més alta, proveu el nivell de processament flexible.",
+		"standard": "Estàndard",
+		"flex": "Flex",
+		"priority": "Prioritat",
+		"pricingTableTitle": "Preus per nivell de servei (preu per 1M de fitxes)",
+		"columns": {
+			"tier": "Nivell",
+			"input": "Entrada",
+			"output": "Sortida",
+			"cacheReads": "Lectures de memòria cau"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json
index 870e587ac9af..6648b6e6702e 100644
--- a/webview-ui/src/i18n/locales/de/settings.json
+++ b/webview-ui/src/i18n/locales/de/settings.json
@@ -858,5 +858,19 @@
 	"includeMaxOutputTokensDescription": "Senden Sie den Parameter für maximale Ausgabe-Tokens in API-Anfragen. Einige Anbieter unterstützen dies möglicherweise nicht.",
 	"limitMaxTokensDescription": "Begrenze die maximale Anzahl von Tokens in der Antwort",
 	"maxOutputTokensLabel": "Maximale Ausgabe-Tokens",
-	"maxTokensGenerateDescription": "Maximale Tokens, die in der Antwort generiert werden"
+	"maxTokensGenerateDescription": "Maximale Tokens, die in der Antwort generiert werden",
+	"serviceTier": {
+		"label": "Service-Stufe",
+		"tooltip": "Für eine schnellere Verarbeitung von API-Anfragen, probiere die Prioritäts-Verarbeitungsstufe. Für niedrigere Preise bei höherer Latenz, probiere die Flex-Verarbeitungsstufe.",
+		"standard": "Standard",
+		"flex": "Flex",
+		"priority": "Priorität",
+		"pricingTableTitle": "Preise nach Service-Stufe (Preis pro 1 Mio. Token)",
+		"columns": {
+			"tier": "Stufe",
+			"input": "Eingabe",
+			"output": "Ausgabe",
+			"cacheReads": "Cache-Lesevorgänge"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json
index cb4c571cb468..c1174cbf0f89 100644
--- a/webview-ui/src/i18n/locales/es/settings.json
+++ b/webview-ui/src/i18n/locales/es/settings.json
@@ -858,5 +858,19 @@
 	"includeMaxOutputTokensDescription": "Enviar parámetro de tokens máximos de salida en solicitudes API. Algunos proveedores pueden no soportar esto.",
 	"limitMaxTokensDescription": "Limitar el número máximo de tokens en la respuesta",
 	"maxOutputTokensLabel": "Tokens máximos de salida",
-	"maxTokensGenerateDescription": "Tokens máximos a generar en la respuesta"
+	"maxTokensGenerateDescription": "Tokens máximos a generar en la respuesta",
+	"serviceTier": {
+		"label": "Nivel de servicio",
+		"tooltip": "Para un procesamiento más rápido de las solicitudes de API, prueba el nivel de servicio de procesamiento prioritario. Para precios más bajos con mayor latencia, prueba el nivel de procesamiento flexible.",
+		"standard": "Estándar",
+		"flex": "Flexible",
+		"priority": "Prioridad",
+		"pricingTableTitle": "Precios por nivel de servicio (precio por 1M de tokens)",
+		"columns": {
+			"tier": "Nivel",
+			"input": "Entrada",
+			"output": "Salida",
+			"cacheReads": "Lecturas de caché"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json
index 3b9f7cab5b83..5cfd4d005ff2 100644
--- a/webview-ui/src/i18n/locales/fr/settings.json
+++ b/webview-ui/src/i18n/locales/fr/settings.json
@@ -858,5 +858,19 @@
 	"includeMaxOutputTokensDescription": "Envoyer le paramètre de tokens de sortie maximum dans les requêtes API. Certains fournisseurs peuvent ne pas supporter cela.",
 	"limitMaxTokensDescription": "Limiter le nombre maximum de tokens dans la réponse",
 	"maxOutputTokensLabel": "Tokens de sortie maximum",
-	"maxTokensGenerateDescription": "Tokens maximum à générer dans la réponse"
+	"maxTokensGenerateDescription": "Tokens maximum à générer dans la réponse",
+	"serviceTier": {
+		"label": "Niveau de service",
+		"tooltip": "Pour un traitement plus rapide des demandes d'API, essayez le niveau de service de traitement prioritaire. Pour des prix plus bas avec une latence plus élevée, essayez le niveau de traitement flexible.",
+		"standard": "Standard",
+		"flex": "Flexible",
+		"priority": "Priorité",
+		"pricingTableTitle": "Tarification par niveau de service (prix par 1M de tokens)",
+		"columns": {
+			"tier": "Niveau",
+			"input": "Entrée",
+			"output": "Sortie",
+			"cacheReads": "Lectures du cache"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json
index b2715c5df46b..bb5cf6f6c40d 100644
--- a/webview-ui/src/i18n/locales/hi/settings.json
+++ b/webview-ui/src/i18n/locales/hi/settings.json
@@ -859,5 +859,19 @@
 	"includeMaxOutputTokensDescription": "API अनुरोधों में अधिकतम आउटपुट टोकन पैरामीटर भेजें। कुछ प्रदाता इसका समर्थन नहीं कर सकते हैं।",
 	"limitMaxTokensDescription": "प्रतिक्रिया में टोकन की अधिकतम संख्या सीमित करें",
 	"maxOutputTokensLabel": "अधिकतम आउटपुट टोकन",
-	"maxTokensGenerateDescription": "प्रतिक्रिया में उत्पन्न करने के लिए अधिकतम टोकन"
+	"maxTokensGenerateDescription": "प्रतिक्रिया में उत्पन्न करने के लिए अधिकतम टोकन",
+	"serviceTier": {
+		"label": "सेवा स्तर",
+		"tooltip": "API अनुरोधों के तेज़ प्रसंस्करण के लिए, प्राथमिकता प्रसंस्करण सेवा स्तर का प्रयास करें। उच्च विलंबता के साथ कम कीमतों के लिए, फ्लेक्स प्रसंस्करण स्तर का प्रयास करें।",
+		"standard": "मानक",
+		"flex": "फ्लेक्स",
+		"priority": "प्राथमिकता",
+		"pricingTableTitle": "सेवा स्तर के अनुसार मूल्य निर्धारण (प्रति 1M टोकन मूल्य)",
+		"columns": {
+			"tier": "स्तर",
+			"input": "इनपुट",
+			"output": "आउटपुट",
+			"cacheReads": "कैश रीड"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/id/settings.json b/webview-ui/src/i18n/locales/id/settings.json
index 35b558ce3dac..93225bab1e31 100644
--- a/webview-ui/src/i18n/locales/id/settings.json
+++ b/webview-ui/src/i18n/locales/id/settings.json
@@ -888,5 +888,19 @@
 	"includeMaxOutputTokensDescription": "Kirim parameter token output maksimum dalam permintaan API. Beberapa provider mungkin tidak mendukung ini.",
 	"limitMaxTokensDescription": "Batasi jumlah maksimum token dalam respons",
 	"maxOutputTokensLabel": "Token output maksimum",
-	"maxTokensGenerateDescription": "Token maksimum untuk dihasilkan dalam respons"
+	"maxTokensGenerateDescription": "Token maksimum untuk dihasilkan dalam respons",
+	"serviceTier": {
+		"label": "Tingkat layanan",
+		"tooltip": "Untuk pemrosesan permintaan API yang lebih cepat, coba tingkat layanan pemrosesan prioritas. Untuk harga lebih rendah dengan latensi lebih tinggi, coba tingkat pemrosesan fleksibel.",
+		"standard": "Standar",
+		"flex": "Fleksibel",
+		"priority": "Prioritas",
+		"pricingTableTitle": "Harga berdasarkan tingkat layanan (harga per 1 juta token)",
+		"columns": {
+			"tier": "Tingkat",
+			"input": "Input",
+			"output": "Output",
+			"cacheReads": "Pembacaan cache"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json
index e430d9f6a8e9..b8487b01dd9c 100644
--- a/webview-ui/src/i18n/locales/it/settings.json
+++ b/webview-ui/src/i18n/locales/it/settings.json
@@ -859,5 +859,19 @@
 	"includeMaxOutputTokensDescription": "Invia il parametro dei token di output massimi nelle richieste API. Alcuni provider potrebbero non supportarlo.",
 	"limitMaxTokensDescription": "Limita il numero massimo di token nella risposta",
 	"maxOutputTokensLabel": "Token di output massimi",
-	"maxTokensGenerateDescription": "Token massimi da generare nella risposta"
+	"maxTokensGenerateDescription": "Token massimi da generare nella risposta",
+	"serviceTier": {
+		"label": "Livello di servizio",
+		"tooltip": "Per un'elaborazione più rapida delle richieste API, prova il livello di servizio di elaborazione prioritaria. Per prezzi più bassi con una latenza maggiore, prova il livello di elaborazione flessibile.",
+		"standard": "Standard",
+		"flex": "Flessibile",
+		"priority": "Priorità",
+		"pricingTableTitle": "Prezzi per livello di servizio (prezzo per 1 milione di token)",
+		"columns": {
+			"tier": "Livello",
+			"input": "Input",
+			"output": "Output",
+			"cacheReads": "Letture cache"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json
index 185abf598a79..d8b9d6482f16 100644
--- a/webview-ui/src/i18n/locales/ja/settings.json
+++ b/webview-ui/src/i18n/locales/ja/settings.json
@@ -859,5 +859,19 @@
 	"includeMaxOutputTokensDescription": "APIリクエストで最大出力トークンパラメータを送信します。一部のプロバイダーはこれをサポートしていない場合があります。",
 	"limitMaxTokensDescription": "レスポンスの最大トークン数を制限する",
 	"maxOutputTokensLabel": "最大出力トークン",
-	"maxTokensGenerateDescription": "レスポンスで生成する最大トークン数"
+	"maxTokensGenerateDescription": "レスポンスで生成する最大トークン数",
+	"serviceTier": {
+		"label": "サービスティア",
+		"tooltip": "APIリクエストをより速く処理するには、優先処理サービスティアをお試しください。低価格でレイテンシが高い場合は、フレックス処理ティアをお試しください。",
+		"standard": "標準",
+		"flex": "フレックス",
+		"priority": "優先",
+		"pricingTableTitle": "サービスティア別料金（100万トークンあたりの価格）",
+		"columns": {
+			"tier": "ティア",
+			"input": "入力",
+			"output": "出力",
+			"cacheReads": "キャッシュ読み取り"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json
index 794896e464ec..6b8cd0d2c98d 100644
--- a/webview-ui/src/i18n/locales/ko/settings.json
+++ b/webview-ui/src/i18n/locales/ko/settings.json
@@ -859,5 +859,19 @@
 	"includeMaxOutputTokensDescription": "API 요청에서 최대 출력 토큰 매개변수를 전송합니다. 일부 제공업체는 이를 지원하지 않을 수 있습니다.",
 	"limitMaxTokensDescription": "응답에서 최대 토큰 수 제한",
 	"maxOutputTokensLabel": "최대 출력 토큰",
-	"maxTokensGenerateDescription": "응답에서 생성할 최대 토큰 수"
+	"maxTokensGenerateDescription": "응답에서 생성할 최대 토큰 수",
+	"serviceTier": {
+		"label": "서비스 등급",
+		"tooltip": "API 요청을 더 빠르게 처리하려면 우선 처리 서비스 등급을 사용해 보세요. 더 낮은 가격에 더 높은 지연 시간을 원하시면 플렉스 처리 등급을 사용해 보세요.",
+		"standard": "표준",
+		"flex": "플렉스",
+		"priority": "우선",
+		"pricingTableTitle": "서비스 등급별 가격 (100만 토큰당 가격)",
+		"columns": {
+			"tier": "등급",
+			"input": "입력",
+			"output": "출력",
+			"cacheReads": "캐시 읽기"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json
index f77717b38e2f..7e9da9b11af1 100644
--- a/webview-ui/src/i18n/locales/nl/settings.json
+++ b/webview-ui/src/i18n/locales/nl/settings.json
@@ -859,5 +859,19 @@
 	"includeMaxOutputTokensDescription": "Stuur maximale output tokens parameter in API-verzoeken. Sommige providers ondersteunen dit mogelijk niet.",
 	"limitMaxTokensDescription": "Beperk het maximale aantal tokens in het antwoord",
 	"maxOutputTokensLabel": "Maximale output tokens",
-	"maxTokensGenerateDescription": "Maximale tokens om te genereren in het antwoord"
+	"maxTokensGenerateDescription": "Maximale tokens om te genereren in het antwoord",
+	"serviceTier": {
+		"label": "Serviceniveau",
+		"tooltip": "Voor snellere verwerking van API-verzoeken, probeer het prioriteitsverwerkingsniveau. Voor lagere prijzen met hogere latentie, probeer het flexverwerkingsniveau.",
+		"standard": "Standaard",
+		"flex": "Flex",
+		"priority": "Prioriteit",
+		"pricingTableTitle": "Prijzen per serviceniveau (prijs per 1M tokens)",
+		"columns": {
+			"tier": "Niveau",
+			"input": "Invoer",
+			"output": "Uitvoer",
+			"cacheReads": "Cache leest"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json
index a09c276fd1ae..c9aa603d2faa 100644
--- a/webview-ui/src/i18n/locales/pl/settings.json
+++ b/webview-ui/src/i18n/locales/pl/settings.json
@@ -859,5 +859,19 @@
 	"includeMaxOutputTokensDescription": "Wyślij parametr maksymalnych tokenów wyjściowych w żądaniach API. Niektórzy dostawcy mogą tego nie obsługiwać.",
 	"limitMaxTokensDescription": "Ogranicz maksymalną liczbę tokenów w odpowiedzi",
 	"maxOutputTokensLabel": "Maksymalne tokeny wyjściowe",
-	"maxTokensGenerateDescription": "Maksymalne tokeny do wygenerowania w odpowiedzi"
+	"maxTokensGenerateDescription": "Maksymalne tokeny do wygenerowania w odpowiedzi",
+	"serviceTier": {
+		"label": "Poziom usług",
+		"tooltip": "Aby szybciej przetwarzać żądania API, wypróbuj priorytetowy poziom usług. Aby uzyskać niższe ceny przy wyższej latencji, wypróbuj elastyczny poziom usług.",
+		"standard": "Standardowy",
+		"flex": "Elastyczny",
+		"priority": "Priorytetowy",
+		"pricingTableTitle": "Cennik według poziomu usług (cena za 1 mln tokenów)",
+		"columns": {
+			"tier": "Poziom",
+			"input": "Wejście",
+			"output": "Wyjście",
+			"cacheReads": "Odczyty z pamięci podręcznej"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json
index e23dfe8e7b5d..0fbb47d34875 100644
--- a/webview-ui/src/i18n/locales/pt-BR/settings.json
+++ b/webview-ui/src/i18n/locales/pt-BR/settings.json
@@ -859,5 +859,19 @@
 	"includeMaxOutputTokensDescription": "Enviar parâmetro de tokens máximos de saída nas solicitações de API. Alguns provedores podem não suportar isso.",
 	"limitMaxTokensDescription": "Limitar o número máximo de tokens na resposta",
 	"maxOutputTokensLabel": "Tokens máximos de saída",
-	"maxTokensGenerateDescription": "Tokens máximos para gerar na resposta"
+	"maxTokensGenerateDescription": "Tokens máximos para gerar na resposta",
+	"serviceTier": {
+		"label": "Nível de serviço",
+		"tooltip": "Para um processamento mais rápido das solicitações de API, experimente o nível de serviço de processamento prioritário. Para preços mais baixos com maior latência, experimente o nível de processamento flexível.",
+		"standard": "Padrão",
+		"flex": "Flexível",
+		"priority": "Prioritário",
+		"pricingTableTitle": "Preços por nível de serviço (preço por 1 milhão de tokens)",
+		"columns": {
+			"tier": "Nível",
+			"input": "Entrada",
+			"output": "Saída",
+			"cacheReads": "Leituras de cache"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json
index 900996b569ac..24b09ab6c1b9 100644
--- a/webview-ui/src/i18n/locales/ru/settings.json
+++ b/webview-ui/src/i18n/locales/ru/settings.json
@@ -859,5 +859,19 @@
 	"includeMaxOutputTokensDescription": "Отправлять параметр максимальных выходных токенов в API-запросах. Некоторые провайдеры могут не поддерживать это.",
 	"limitMaxTokensDescription": "Ограничить максимальное количество токенов в ответе",
 	"maxOutputTokensLabel": "Максимальные выходные токены",
-	"maxTokensGenerateDescription": "Максимальные токены для генерации в ответе"
+	"maxTokensGenerateDescription": "Максимальные токены для генерации в ответе",
+	"serviceTier": {
+		"label": "Уровень обслуживания",
+		"tooltip": "Для более быстрой обработки запросов API попробуйте уровень обслуживания с приоритетной обработкой. Для более низких цен с более высокой задержкой попробуйте уровень гибкой обработки.",
+		"standard": "Стандартный",
+		"flex": "Гибкий",
+		"priority": "Приоритетный",
+		"pricingTableTitle": "Цены по уровням обслуживания (цена за 1 млн токенов)",
+		"columns": {
+			"tier": "Уровень",
+			"input": "Вход",
+			"output": "Выход",
+			"cacheReads": "Чтения из кэша"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json
index e34f6c08b9be..91e5b3e9d02c 100644
--- a/webview-ui/src/i18n/locales/tr/settings.json
+++ b/webview-ui/src/i18n/locales/tr/settings.json
@@ -859,5 +859,19 @@
 	"includeMaxOutputTokensDescription": "API isteklerinde maksimum çıktı token parametresini gönder. Bazı sağlayıcılar bunu desteklemeyebilir.",
 	"limitMaxTokensDescription": "Yanıttaki maksimum token sayısını sınırla",
 	"maxOutputTokensLabel": "Maksimum çıktı tokenları",
-	"maxTokensGenerateDescription": "Yanıtta oluşturulacak maksimum token sayısı"
+	"maxTokensGenerateDescription": "Yanıtta oluşturulacak maksimum token sayısı",
+	"serviceTier": {
+		"label": "Hizmet seviyesi",
+		"tooltip": "Daha hızlı API isteği işleme için öncelikli işleme hizmeti seviyesini deneyin. Daha düşük gecikme süresiyle daha düşük fiyatlar için esnek işleme seviyesini deneyin.",
+		"standard": "Standart",
+		"flex": "Esnek",
+		"priority": "Öncelik",
+		"pricingTableTitle": "Hizmet seviyesine göre fiyatlandırma (1 milyon token başına fiyat)",
+		"columns": {
+			"tier": "Seviye",
+			"input": "Giriş",
+			"output": "Çıkış",
+			"cacheReads": "Önbellek okumaları"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json
index 8348156569d4..c6fdea7841c4 100644
--- a/webview-ui/src/i18n/locales/vi/settings.json
+++ b/webview-ui/src/i18n/locales/vi/settings.json
@@ -859,5 +859,19 @@
 	"includeMaxOutputTokensDescription": "Gửi tham số token đầu ra tối đa trong các yêu cầu API. Một số nhà cung cấp có thể không hỗ trợ điều này.",
 	"limitMaxTokensDescription": "Giới hạn số lượng token tối đa trong phản hồi",
 	"maxOutputTokensLabel": "Token đầu ra tối đa",
-	"maxTokensGenerateDescription": "Token tối đa để tạo trong phản hồi"
+	"maxTokensGenerateDescription": "Token tối đa để tạo trong phản hồi",
+	"serviceTier": {
+		"label": "Cấp độ dịch vụ",
+		"tooltip": "Để xử lý các yêu cầu API nhanh hơn, hãy thử cấp độ dịch vụ xử lý ưu tiên. Để có giá thấp hơn với độ trễ cao hơn, hãy thử cấp độ xử lý linh hoạt.",
+		"standard": "Tiêu chuẩn",
+		"flex": "Linh hoạt",
+		"priority": "Ưu tiên",
+		"pricingTableTitle": "Giá theo cấp độ dịch vụ (giá mỗi 1 triệu token)",
+		"columns": {
+			"tier": "Cấp độ",
+			"input": "Đầu vào",
+			"output": "Đầu ra",
+			"cacheReads": "Lượt đọc bộ nhớ đệm"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json
index 69e13dbfdfd0..c8ca284c04b6 100644
--- a/webview-ui/src/i18n/locales/zh-CN/settings.json
+++ b/webview-ui/src/i18n/locales/zh-CN/settings.json
@@ -859,5 +859,19 @@
 	"includeMaxOutputTokensDescription": "在 API 请求中发送最大输出 Token 参数。某些提供商可能不支持此功能。",
 	"limitMaxTokensDescription": "限制响应中的最大 Token 数量",
 	"maxOutputTokensLabel": "最大输出 Token 数",
-	"maxTokensGenerateDescription": "响应中生成的最大 Token 数"
+	"maxTokensGenerateDescription": "响应中生成的最大 Token 数",
+	"serviceTier": {
+		"label": "服务等级",
+		"tooltip": "为加快API请求处理速度，请尝试优先处理服务等级。为获得更低价格但延迟较高，请尝试灵活处理等级。",
+		"standard": "标准",
+		"flex": "灵活",
+		"priority": "优先",
+		"pricingTableTitle": "按服务等级定价 (每百万Token价格)",
+		"columns": {
+			"tier": "等级",
+			"input": "输入",
+			"output": "输出",
+			"cacheReads": "缓存读取"
+		}
+	}
 }
diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json
index 68561695d76d..8163cce20fd3 100644
--- a/webview-ui/src/i18n/locales/zh-TW/settings.json
+++ b/webview-ui/src/i18n/locales/zh-TW/settings.json
@@ -859,5 +859,19 @@
 	"includeMaxOutputTokensDescription": "在 API 請求中傳送最大輸出 Token 參數。某些提供商可能不支援此功能。",
 	"limitMaxTokensDescription": "限制回應中的最大 Token 數量",
 	"maxOutputTokensLabel": "最大輸出 Token 數",
-	"maxTokensGenerateDescription": "回應中產生的最大 Token 數"
+	"maxTokensGenerateDescription": "回應中產生的最大 Token 數",
+	"serviceTier": {
+		"label": "服務層級",
+		"tooltip": "若需更快的 API 請求處理，請嘗試優先處理服務層級。若需較低價格但延遲較高，請嘗試彈性處理層級。",
+		"standard": "標準",
+		"flex": "彈性",
+		"priority": "優先",
+		"pricingTableTitle": "按服務層級定價（每百萬 Token 價格）",
+		"columns": {
+			"tier": "層級",
+			"input": "輸入",
+			"output": "輸出",
+			"cacheReads": "快取讀取"
+		}
+	}
 }

From d9ec2484642e1fd830a9240ead1141e6cea46179 Mon Sep 17 00:00:00 2001
From: Daniel Riccio <ricciodaniel98@gmail.com>
Date: Thu, 4 Sep 2025 12:46:22 -0500
Subject: [PATCH 5/6] fix: update OpenAI provider to use tiers array format for
 service tier pricing

---
 packages/types/src/providers/openai.ts | 76 +++++++++++---------------
 1 file changed, 32 insertions(+), 44 deletions(-)

diff --git a/packages/types/src/providers/openai.ts b/packages/types/src/providers/openai.ts
index 37d6dc800aed..028027baad61 100644
--- a/packages/types/src/providers/openai.ts
+++ b/packages/types/src/providers/openai.ts
@@ -32,11 +32,10 @@ export const openAiNativeModels = {
 		// supportsVerbosity is a new capability; ensure ModelInfo includes it
 		supportsVerbosity: true,
 		supportsTemperature: false,
-		allowedServiceTiers: ["flex", "priority"],
-		serviceTierPricing: {
-			flex: { inputPrice: 0.625, outputPrice: 5.0, cacheReadsPrice: 0.0625 },
-			priority: { inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 },
-		},
+		tiers: [
+			{ name: "flex", contextWindow: 400000, inputPrice: 0.625, outputPrice: 5.0, cacheReadsPrice: 0.0625 },
+			{ name: "priority", contextWindow: 400000, inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 },
+		],
 	},
 	"gpt-5-mini-2025-08-07": {
 		maxTokens: 128000,
@@ -51,11 +50,10 @@ export const openAiNativeModels = {
 		description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
 		supportsVerbosity: true,
 		supportsTemperature: false,
-		allowedServiceTiers: ["flex", "priority"],
-		serviceTierPricing: {
-			flex: { inputPrice: 0.125, outputPrice: 1.0, cacheReadsPrice: 0.0125 },
-			priority: { inputPrice: 0.45, outputPrice: 3.6, cacheReadsPrice: 0.045 },
-		},
+		tiers: [
+			{ name: "flex", contextWindow: 400000, inputPrice: 0.125, outputPrice: 1.0, cacheReadsPrice: 0.0125 },
+			{ name: "priority", contextWindow: 400000, inputPrice: 0.45, outputPrice: 3.6, cacheReadsPrice: 0.045 },
+		],
 	},
 	"gpt-5-nano-2025-08-07": {
 		maxTokens: 128000,
@@ -70,10 +68,7 @@ export const openAiNativeModels = {
 		description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
 		supportsVerbosity: true,
 		supportsTemperature: false,
-		allowedServiceTiers: ["flex"],
-		serviceTierPricing: {
-			flex: { inputPrice: 0.025, outputPrice: 0.2, cacheReadsPrice: 0.0025 },
-		},
+		tiers: [{ name: "flex", contextWindow: 400000, inputPrice: 0.025, outputPrice: 0.2, cacheReadsPrice: 0.0025 }],
 	},
 	"gpt-4.1": {
 		maxTokens: 32_768,
@@ -84,10 +79,9 @@ export const openAiNativeModels = {
 		outputPrice: 8,
 		cacheReadsPrice: 0.5,
 		supportsTemperature: true,
-		allowedServiceTiers: ["priority"],
-		serviceTierPricing: {
-			priority: { inputPrice: 3.5, outputPrice: 14.0, cacheReadsPrice: 0.875 },
-		},
+		tiers: [
+			{ name: "priority", contextWindow: 1_047_576, inputPrice: 3.5, outputPrice: 14.0, cacheReadsPrice: 0.875 },
+		],
 	},
 	"gpt-4.1-mini": {
 		maxTokens: 32_768,
@@ -98,10 +92,9 @@ export const openAiNativeModels = {
 		outputPrice: 1.6,
 		cacheReadsPrice: 0.1,
 		supportsTemperature: true,
-		allowedServiceTiers: ["priority"],
-		serviceTierPricing: {
-			priority: { inputPrice: 0.7, outputPrice: 2.8, cacheReadsPrice: 0.175 },
-		},
+		tiers: [
+			{ name: "priority", contextWindow: 1_047_576, inputPrice: 0.7, outputPrice: 2.8, cacheReadsPrice: 0.175 },
+		],
 	},
 	"gpt-4.1-nano": {
 		maxTokens: 32_768,
@@ -112,10 +105,9 @@ export const openAiNativeModels = {
 		outputPrice: 0.4,
 		cacheReadsPrice: 0.025,
 		supportsTemperature: true,
-		allowedServiceTiers: ["priority"],
-		serviceTierPricing: {
-			priority: { inputPrice: 0.2, outputPrice: 0.8, cacheReadsPrice: 0.05 },
-		},
+		tiers: [
+			{ name: "priority", contextWindow: 1_047_576, inputPrice: 0.2, outputPrice: 0.8, cacheReadsPrice: 0.05 },
+		],
 	},
 	o3: {
 		maxTokens: 100_000,
@@ -128,11 +120,10 @@ export const openAiNativeModels = {
 		supportsReasoningEffort: true,
 		reasoningEffort: "medium",
 		supportsTemperature: false,
-		allowedServiceTiers: ["flex", "priority"],
-		serviceTierPricing: {
-			flex: { inputPrice: 1.0, outputPrice: 4.0, cacheReadsPrice: 0.25 },
-			priority: { inputPrice: 3.5, outputPrice: 14.0, cacheReadsPrice: 0.875 },
-		},
+		tiers: [
+			{ name: "flex", contextWindow: 200_000, inputPrice: 1.0, outputPrice: 4.0, cacheReadsPrice: 0.25 },
+			{ name: "priority", contextWindow: 200_000, inputPrice: 3.5, outputPrice: 14.0, cacheReadsPrice: 0.875 },
+		],
 	},
 	"o3-high": {
 		maxTokens: 100_000,
@@ -167,11 +158,10 @@ export const openAiNativeModels = {
 		supportsReasoningEffort: true,
 		reasoningEffort: "medium",
 		supportsTemperature: false,
-		allowedServiceTiers: ["flex", "priority"],
-		serviceTierPricing: {
-			flex: { inputPrice: 0.55, outputPrice: 2.2, cacheReadsPrice: 0.138 },
-			priority: { inputPrice: 2.0, outputPrice: 8.0, cacheReadsPrice: 0.5 },
-		},
+		tiers: [
+			{ name: "flex", contextWindow: 200_000, inputPrice: 0.55, outputPrice: 2.2, cacheReadsPrice: 0.138 },
+			{ name: "priority", contextWindow: 200_000, inputPrice: 2.0, outputPrice: 8.0, cacheReadsPrice: 0.5 },
+		],
 	},
 	"o4-mini-high": {
 		maxTokens: 100_000,
@@ -268,10 +258,9 @@ export const openAiNativeModels = {
 		outputPrice: 10,
 		cacheReadsPrice: 1.25,
 		supportsTemperature: true,
-		allowedServiceTiers: ["priority"],
-		serviceTierPricing: {
-			priority: { inputPrice: 4.25, outputPrice: 17.0, cacheReadsPrice: 2.125 },
-		},
+		tiers: [
+			{ name: "priority", contextWindow: 128_000, inputPrice: 4.25, outputPrice: 17.0, cacheReadsPrice: 2.125 },
+		],
 	},
 	"gpt-4o-mini": {
 		maxTokens: 16_384,
@@ -282,10 +271,9 @@ export const openAiNativeModels = {
 		outputPrice: 0.6,
 		cacheReadsPrice: 0.075,
 		supportsTemperature: true,
-		allowedServiceTiers: ["priority"],
-		serviceTierPricing: {
-			priority: { inputPrice: 0.25, outputPrice: 1.0, cacheReadsPrice: 0.125 },
-		},
+		tiers: [
+			{ name: "priority", contextWindow: 128_000, inputPrice: 0.25, outputPrice: 1.0, cacheReadsPrice: 0.125 },
+		],
 	},
 	"codex-mini-latest": {
 		maxTokens: 16_384,

From 6ca429df5f9f6c808a8ced90999e4302e04c99c6 Mon Sep 17 00:00:00 2001
From: Daniel Riccio <ricciodaniel98@gmail.com>
Date: Thu, 4 Sep 2025 12:50:26 -0500
Subject: [PATCH 6/6] fix: update OpenAI component to use tiers array for
 service tier selection

---
 webview-ui/src/components/settings/providers/OpenAI.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/webview-ui/src/components/settings/providers/OpenAI.tsx b/webview-ui/src/components/settings/providers/OpenAI.tsx
index 666d786e1f2b..59b907c45a52 100644
--- a/webview-ui/src/components/settings/providers/OpenAI.tsx
+++ b/webview-ui/src/components/settings/providers/OpenAI.tsx
@@ -76,7 +76,7 @@ export const OpenAI = ({ apiConfiguration, setApiConfigurationField, selectedMod
 			)}
 
 			{(() => {
-				const allowedTiers = (selectedModelInfo?.allowedServiceTiers || []).filter(
+				const allowedTiers = (selectedModelInfo?.tiers?.map((t) => t.name).filter(Boolean) || []).filter(
 					(t) => t === "flex" || t === "priority",
 				)
 				if (allowedTiers.length === 0) return null

Tier	Input	Output	Cache reads
Standard	{fmt(modelInfo?.inputPrice)}	{fmt(modelInfo?.outputPrice)}	{fmt(modelInfo?.cacheReadsPrice)}
Flex	+ {fmt(tierPricing?.flex?.inputPrice ?? modelInfo?.inputPrice)} +	+ {fmt(tierPricing?.flex?.outputPrice ?? modelInfo?.outputPrice)} +	+ {fmt(tierPricing?.flex?.cacheReadsPrice ?? modelInfo?.cacheReadsPrice)} +
Priority	+ {fmt(tierPricing?.priority?.inputPrice ?? modelInfo?.inputPrice)} +	+ {fmt(tierPricing?.priority?.outputPrice ?? modelInfo?.outputPrice)} +	+ {fmt(tierPricing?.priority?.cacheReadsPrice ?? modelInfo?.cacheReadsPrice)} +
Tier	Input	Output	Cache reads	{t("settings:serviceTier.columns.tier")}
Standard	{t("settings:serviceTier.standard")}	{fmt(modelInfo?.inputPrice)}	{fmt(modelInfo?.outputPrice)}	{fmt(modelInfo?.cacheReadsPrice)}
Flex	{t("settings:serviceTier.flex")}	- {fmt(tierPricing?.flex?.inputPrice ?? modelInfo?.inputPrice)} + {fmt( + modelInfo?.tiers?.find((t) => t.name === "flex")?.inputPrice ?? + modelInfo?.inputPrice, + )}	- {fmt(tierPricing?.flex?.outputPrice ?? modelInfo?.outputPrice)} + {fmt( + modelInfo?.tiers?.find((t) => t.name === "flex")?.outputPrice ?? + modelInfo?.outputPrice, + )}	- {fmt(tierPricing?.flex?.cacheReadsPrice ?? modelInfo?.cacheReadsPrice)} + {fmt( + modelInfo?.tiers?.find((t) => t.name === "flex")?.cacheReadsPrice ?? + modelInfo?.cacheReadsPrice, + )}
Priority	{t("settings:serviceTier.priority")}	- {fmt(tierPricing?.priority?.inputPrice ?? modelInfo?.inputPrice)} + {fmt( + modelInfo?.tiers?.find((t) => t.name === "priority")?.inputPrice ?? + modelInfo?.inputPrice, + )}	- {fmt(tierPricing?.priority?.outputPrice ?? modelInfo?.outputPrice)} + {fmt( + modelInfo?.tiers?.find((t) => t.name === "priority")?.outputPrice ?? + modelInfo?.outputPrice, + )}	- {fmt(tierPricing?.priority?.cacheReadsPrice ?? modelInfo?.cacheReadsPrice)} + {fmt( + modelInfo?.tiers?.find((t) => t.name === "priority")?.cacheReadsPrice ?? + modelInfo?.cacheReadsPrice, + )}