diff --git a/python/samples/concepts/setup/ALL_SETTINGS.md b/python/samples/concepts/setup/ALL_SETTINGS.md index ea7f911a5bb1..cb7ea81af670 100644 --- a/python/samples/concepts/setup/ALL_SETTINGS.md +++ b/python/samples/concepts/setup/ALL_SETTINGS.md @@ -30,7 +30,8 @@ | | [VertexAITextEmbedding](../../../semantic_kernel/connectors/ai/google/google_ai/services/google_ai_text_embedding.py) | project_id,
region,
embedding_model_id | VERTEX_AI_PROJECT_ID,
VERTEX_AI_REGION,
VERTEX_AI_EMBEDDING_MODEL_ID | Yes,
No,
Yes | | | HuggingFace | [HuggingFaceTextCompletion](../../../semantic_kernel/connectors/ai/hugging_face/services/hf_text_completion.py) | ai_model_id | N/A | Yes | | | | [HuggingFaceTextEmbedding](../../../semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py) | ai_model_id | N/A | Yes | | -| NVIDIA NIM | [NvidiaTextEmbedding](../../../semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py) | ai_model_id,
api_key,
base_url | NVIDIA_API_KEY,
NVIDIA_TEXT_EMBEDDING_MODEL_ID,
NVIDIA_BASE_URL | Yes | [NvidiaAISettings](../../../semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py) | +| NVIDIA NIM | [NvidiaChatCompletion](../../../semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py) | ai_model_id,
api_key,
base_url | NVIDIA_CHAT_MODEL_ID,
NVIDIA_API_KEY,
NVIDIA_BASE_URL | Yes (default: meta/llama-3.1-8b-instruct),
Yes,
No | [NvidiaAISettings](../../../semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py) | +| | [NvidiaTextEmbedding](../../../semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py) | ai_model_id,
api_key,
base_url | NVIDIA_API_KEY,
NVIDIA_TEXT_EMBEDDING_MODEL_ID,
NVIDIA_BASE_URL | Yes | [NvidiaAISettings](../../../semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py) | | Mistral AI | [MistralAIChatCompletion](../../../semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py) | ai_model_id,
api_key | MISTRALAI_CHAT_MODEL_ID,
MISTRALAI_API_KEY | Yes,
Yes | [MistralAISettings](../../../semantic_kernel/connectors/ai/mistral_ai/settings/mistral_ai_settings.py) | | | [MistralAITextEmbedding](../../../semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_text_embedding.py) | ai_model_id,
api_key | MISTRALAI_EMBEDDING_MODEL_ID,
MISTRALAI_API_KEY | Yes,
Yes | | | Ollama | [OllamaChatCompletion](../../../semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py) | ai_model_id,
host | OLLAMA_CHAT_MODEL_ID,
OLLAMA_HOST | Yes,
No | [OllamaSettings](../../../semantic_kernel/connectors/ai/ollama/ollama_settings.py) | diff --git a/python/samples/concepts/setup/chat_completion_services.py b/python/samples/concepts/setup/chat_completion_services.py index 4baad81ea968..918e61c536db 100644 --- a/python/samples/concepts/setup/chat_completion_services.py +++ b/python/samples/concepts/setup/chat_completion_services.py @@ -28,6 +28,7 @@ class Services(str, Enum): ONNX = "onnx" VERTEX_AI = "vertex_ai" DEEPSEEK = "deepseek" + NVIDIA = "nvidia" service_id = "default" @@ -64,6 +65,7 @@ def get_chat_completion_service_and_request_settings( Services.ONNX: lambda: get_onnx_chat_completion_service_and_request_settings(), Services.VERTEX_AI: lambda: get_vertex_ai_chat_completion_service_and_request_settings(), Services.DEEPSEEK: lambda: get_deepseek_chat_completion_service_and_request_settings(), + Services.NVIDIA: lambda: get_nvidia_chat_completion_service_and_request_settings(), } # Call the appropriate lambda or function based on the service name @@ -414,3 +416,27 @@ def get_deepseek_chat_completion_service_and_request_settings() -> tuple[ request_settings = OpenAIChatPromptExecutionSettings(service_id=service_id) return chat_service, request_settings + + +def get_nvidia_chat_completion_service_and_request_settings() -> tuple[ + "ChatCompletionClientBase", "PromptExecutionSettings" +]: + """Return NVIDIA chat completion service and request settings. + + The service credentials can be read by 3 ways: + 1. Via the constructor + 2. Via the environment variables + 3. Via an environment file + + The request settings control the behavior of the service. The default settings are sufficient to get started. + However, you can adjust the settings to suit your needs. + Note: Some of the settings are NOT meant to be set by the user. + Please refer to the Semantic Kernel Python documentation for more information: + https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel-python + """ + from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion, NvidiaChatPromptExecutionSettings + + chat_service = NvidiaChatCompletion(service_id=service_id) + request_settings = NvidiaChatPromptExecutionSettings(service_id=service_id) + + return chat_service, request_settings diff --git a/python/semantic_kernel/connectors/ai/nvidia/README.md b/python/semantic_kernel/connectors/ai/nvidia/README.md index 989446d06a05..0533f5aa4fa9 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/README.md +++ b/python/semantic_kernel/connectors/ai/nvidia/README.md @@ -1,6 +1,6 @@ # semantic_kernel.connectors.ai.nvidia -This connector enables integration with NVIDIA NIM API for text embeddings. It allows you to use NVIDIA's embedding models within the Semantic Kernel framework. +This connector enables integration with NVIDIA NIM API for text embeddings and chat completion. It allows you to use NVIDIA's models within the Semantic Kernel framework. ## Quick start @@ -13,6 +13,8 @@ kernel = sk.Kernel() ### Add NVIDIA text embedding service You can provide your API key directly or through environment variables ```python +from semantic_kernel.connectors.ai.nvidia import NvidiaTextEmbedding + embedding_service = NvidiaTextEmbedding( ai_model_id="nvidia/nv-embedqa-e5-v5", # Default model if not specified api_key="your-nvidia-api-key", # Can also use NVIDIA_API_KEY env variable @@ -30,3 +32,35 @@ kernel.add_service(embedding_service) texts = ["Hello, world!", "Semantic Kernel is awesome"] embeddings = await kernel.get_service("nvidia-embeddings").generate_embeddings(texts) ``` + +### Add NVIDIA chat completion service +```python +from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion + +chat_service = NvidiaChatCompletion( + ai_model_id="meta/llama-3.1-8b-instruct", # Default model if not specified + api_key="your-nvidia-api-key", # Can also use NVIDIA_API_KEY env variable + service_id="nvidia-chat" # Optional service identifier +) +kernel.add_service(chat_service) +``` + +### Basic chat completion +```python +response = await kernel.invoke_prompt("Hello, how are you?") +``` + +### Using with Chat Completion Agent +```python +from semantic_kernel.agents import ChatCompletionAgent +from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion + +agent = ChatCompletionAgent( + service=NvidiaChatCompletion(), + name="SK-Assistant", + instructions="You are a helpful assistant.", +) +response = await agent.get_response(messages="Write a haiku about Semantic Kernel.") +print(response.content) +``` + diff --git a/python/semantic_kernel/connectors/ai/nvidia/__init__.py b/python/semantic_kernel/connectors/ai/nvidia/__init__.py index 7a2a6679996d..edaf3fbcd59c 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/__init__.py +++ b/python/semantic_kernel/connectors/ai/nvidia/__init__.py @@ -1,13 +1,17 @@ # Copyright (c) Microsoft. All rights reserved. from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( + NvidiaChatPromptExecutionSettings, NvidiaEmbeddingPromptExecutionSettings, NvidiaPromptExecutionSettings, ) +from semantic_kernel.connectors.ai.nvidia.services.nvidia_chat_completion import NvidiaChatCompletion from semantic_kernel.connectors.ai.nvidia.services.nvidia_text_embedding import NvidiaTextEmbedding from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings __all__ = [ + "NvidiaChatCompletion", + "NvidiaChatPromptExecutionSettings", "NvidiaEmbeddingPromptExecutionSettings", "NvidiaPromptExecutionSettings", "NvidiaSettings", diff --git a/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py index 464db5aa1f3b..85f2d49dce05 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py +++ b/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py @@ -2,7 +2,7 @@ from typing import Annotated, Any, Literal -from pydantic import Field +from pydantic import BaseModel, Field from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings @@ -13,18 +13,6 @@ class NvidiaPromptExecutionSettings(PromptExecutionSettings): format: Literal["json"] | None = None options: dict[str, Any] | None = None - def prepare_settings_dict(self, **kwargs) -> dict[str, Any]: - """Prepare the settings as a dictionary for sending to the AI service. - - By default, this method excludes the service_id and extension_data fields. - As well as any fields that are None. - """ - return self.model_dump( - exclude={"service_id", "extension_data", "structured_json_response", "input_type", "truncate"}, - exclude_none=True, - by_alias=True, - ) - class NvidiaEmbeddingPromptExecutionSettings(NvidiaPromptExecutionSettings): """Settings for NVIDIA embedding prompt execution.""" @@ -39,3 +27,47 @@ class NvidiaEmbeddingPromptExecutionSettings(NvidiaPromptExecutionSettings): extra_body: dict | None = None timeout: float | None = None dimensions: Annotated[int | None, Field(gt=0)] = None + + def prepare_settings_dict(self, **kwargs) -> dict[str, Any]: + """Override only for embeddings to exclude input_type and truncate.""" + return self.model_dump( + exclude={"service_id", "extension_data", "structured_json_response", "input_type", "truncate"}, + exclude_none=True, + by_alias=True, + ) + + +class NvidiaChatPromptExecutionSettings(NvidiaPromptExecutionSettings): + """Settings for NVIDIA chat prompt execution.""" + + messages: list[dict[str, str]] | None = None + ai_model_id: Annotated[str | None, Field(serialization_alias="model")] = None + temperature: float | None = None + top_p: float | None = None + n: int | None = None + stream: bool = False + stop: str | list[str] | None = None + max_tokens: int | None = None + presence_penalty: float | None = None + frequency_penalty: float | None = None + logit_bias: dict[str, float] | None = None + user: str | None = None + tools: list[dict[str, Any]] | None = None + tool_choice: str | dict[str, Any] | None = None + response_format: ( + dict[Literal["type"], Literal["text", "json_object"]] | dict[str, Any] | type[BaseModel] | type | None + ) = None + seed: int | None = None + extra_headers: dict | None = None + extra_body: dict | None = None + timeout: float | None = None + # NVIDIA-specific structured output support + nvext: dict[str, Any] | None = None + + def prepare_settings_dict(self, **kwargs) -> dict[str, Any]: + """Override only for embeddings to exclude input_type and truncate.""" + return self.model_dump( + exclude={"service_id", "extension_data", "structured_json_response", "response_format"}, + exclude_none=True, + by_alias=True, + ) diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py new file mode 100644 index 000000000000..fdd2ae0dce0b --- /dev/null +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py @@ -0,0 +1,313 @@ +# Copyright (c) Microsoft. All rights reserved. + +import logging +import sys +from collections.abc import AsyncGenerator +from typing import Any, Literal + +from openai import AsyncOpenAI +from openai.types.chat.chat_completion import ChatCompletion, Choice +from openai.types.chat.chat_completion_chunk import ChatCompletionChunk +from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice +from pydantic import ValidationError + +from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase +from semantic_kernel.connectors.ai.completion_usage import CompletionUsage +from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( + NvidiaChatPromptExecutionSettings, +) +from semantic_kernel.connectors.ai.nvidia.services.nvidia_handler import NvidiaHandler +from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes +from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.contents import ( + AuthorRole, + ChatMessageContent, + FinishReason, + FunctionCallContent, + StreamingChatMessageContent, + StreamingTextContent, + TextContent, +) +from semantic_kernel.contents.chat_history import ChatHistory +from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError +from semantic_kernel.utils.feature_stage_decorator import experimental +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import ( + trace_chat_completion, + trace_streaming_chat_completion, +) + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + +logger: logging.Logger = logging.getLogger(__name__) + +# Default NVIDIA chat model when none is specified +DEFAULT_NVIDIA_CHAT_MODEL = "meta/llama-3.1-8b-instruct" + + +@experimental +class NvidiaChatCompletion(NvidiaHandler, ChatCompletionClientBase): + """NVIDIA Chat completion class. + + This class does not support function calling. The SUPPORTS_FUNCTION_CALLING attribute + is set to False (inherited from the base class). + """ + + def __init__( + self, + ai_model_id: str | None = None, + api_key: str | None = None, + base_url: str | None = None, + service_id: str | None = None, + client: AsyncOpenAI | None = None, + env_file_path: str | None = None, + env_file_encoding: str | None = None, + instruction_role: Literal["system", "user", "assistant", "developer"] | None = None, + ) -> None: + """Initialize an NvidiaChatCompletion service. + + Args: + ai_model_id (str): NVIDIA model name, see + https://docs.api.nvidia.com/nim/reference/ + If not provided, defaults to DEFAULT_NVIDIA_CHAT_MODEL. + service_id (str | None): Service ID tied to the execution settings. + api_key (str | None): The optional API key to use. If provided will override, + the env vars or .env file value. + base_url (str | None): Custom API endpoint. (Optional) + client (Optional[AsyncOpenAI]): An existing client to use. (Optional) + env_file_path (str | None): Use the environment settings file as a fallback + to environment variables. (Optional) + env_file_encoding (str | None): The encoding of the environment settings file. (Optional) + instruction_role (Literal["system", "user", "assistant", "developer"] | None): The role to use for + 'instruction' messages. Defaults to "system". (Optional) + """ + try: + nvidia_settings = NvidiaSettings( + api_key=api_key, + base_url=base_url, + chat_model_id=ai_model_id, + env_file_path=env_file_path, + env_file_encoding=env_file_encoding, + ) + except ValidationError as ex: + raise ServiceInitializationError("Failed to create NVIDIA settings.", ex) from ex + + if not client and not nvidia_settings.api_key: + raise ServiceInitializationError("The NVIDIA API key is required.") + if not nvidia_settings.chat_model_id: + # Default fallback model + nvidia_settings.chat_model_id = DEFAULT_NVIDIA_CHAT_MODEL + logger.warning(f"Default chat model set as: {nvidia_settings.chat_model_id}") + + # Create client if not provided + if not client: + client = AsyncOpenAI( + api_key=nvidia_settings.api_key.get_secret_value() if nvidia_settings.api_key else None, + base_url=nvidia_settings.base_url, + ) + + super().__init__( + ai_model_id=nvidia_settings.chat_model_id, + api_key=nvidia_settings.api_key.get_secret_value() if nvidia_settings.api_key else None, + base_url=nvidia_settings.base_url, + service_id=service_id or "", + ai_model_type=NvidiaModelTypes.CHAT, + client=client, + instruction_role=instruction_role or "system", + ) + + @classmethod + def from_dict(cls: type["NvidiaChatCompletion"], settings: dict[str, Any]) -> "NvidiaChatCompletion": + """Initialize an NVIDIA service from a dictionary of settings. + + Args: + settings: A dictionary of settings for the service. + """ + return cls( + ai_model_id=settings.get("ai_model_id"), + api_key=settings.get("api_key"), + base_url=settings.get("base_url"), + service_id=settings.get("service_id"), + env_file_path=settings.get("env_file_path"), + ) + + @override + def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]: + return NvidiaChatPromptExecutionSettings + + @override + @trace_chat_completion("nvidia") + async def _inner_get_chat_message_contents( + self, + chat_history: "ChatHistory", + settings: "PromptExecutionSettings", + ) -> list["ChatMessageContent"]: + if not isinstance(settings, NvidiaChatPromptExecutionSettings): + settings = self.get_prompt_execution_settings_from_settings(settings) + assert isinstance(settings, NvidiaChatPromptExecutionSettings) # nosec + + settings.stream = False + settings.messages = self._prepare_chat_history_for_request(chat_history) + settings.ai_model_id = settings.ai_model_id or self.ai_model_id + + # Handle structured output + self._handle_structured_output(settings) + + response = await self._send_request(settings) + assert isinstance(response, ChatCompletion) # nosec + response_metadata = self._get_metadata_from_chat_response(response) + return [self._create_chat_message_content(response, choice, response_metadata) for choice in response.choices] + + @override + @trace_streaming_chat_completion("nvidia") + async def _inner_get_streaming_chat_message_contents( + self, + chat_history: "ChatHistory", + settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, + ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: + if not isinstance(settings, NvidiaChatPromptExecutionSettings): + settings = self.get_prompt_execution_settings_from_settings(settings) + assert isinstance(settings, NvidiaChatPromptExecutionSettings) # nosec + + settings.stream = True + settings.messages = self._prepare_chat_history_for_request(chat_history) + settings.ai_model_id = settings.ai_model_id or self.ai_model_id + + # Handle structured output + self._handle_structured_output(settings) + + response = await self._send_request(settings) + assert isinstance(response, AsyncGenerator) # nosec + + async for chunk in response: + if len(chunk.choices) == 0: + continue + chunk_metadata = self._get_metadata_from_chat_response(chunk) + yield [ + self._create_streaming_chat_message_content(chunk, choice, chunk_metadata, function_invoke_attempt) + for choice in chunk.choices + ] + + def _create_chat_message_content( + self, response: ChatCompletion, choice: Choice, response_metadata: dict[str, Any] + ) -> "ChatMessageContent": + """Create a chat message content object from a choice.""" + metadata = self._get_metadata_from_chat_choice(choice) + metadata.update(response_metadata) + + items: list[Any] = self._get_tool_calls_from_chat_choice(choice) + items.extend(self._get_function_call_from_chat_choice(choice)) + if choice.message.content: + items.append(TextContent(text=choice.message.content)) + + return ChatMessageContent( + inner_content=response, + ai_model_id=self.ai_model_id, + metadata=metadata, + role=AuthorRole(choice.message.role), + items=items, + finish_reason=(FinishReason(choice.finish_reason) if choice.finish_reason else None), + ) + + def _create_streaming_chat_message_content( + self, + chunk: ChatCompletionChunk, + choice: ChunkChoice, + chunk_metadata: dict[str, Any], + function_invoke_attempt: int, + ) -> StreamingChatMessageContent: + """Create a streaming chat message content object from a choice.""" + metadata = self._get_metadata_from_chat_choice(choice) + metadata.update(chunk_metadata) + + items: list[Any] = self._get_tool_calls_from_chat_choice(choice) + items.extend(self._get_function_call_from_chat_choice(choice)) + if choice.delta and choice.delta.content is not None: + items.append(StreamingTextContent(choice_index=choice.index, text=choice.delta.content)) + return StreamingChatMessageContent( + choice_index=choice.index, + inner_content=chunk, + ai_model_id=self.ai_model_id, + metadata=metadata, + role=(AuthorRole(choice.delta.role) if choice.delta and choice.delta.role else AuthorRole.ASSISTANT), + finish_reason=(FinishReason(choice.finish_reason) if choice.finish_reason else None), + items=items, + function_invoke_attempt=function_invoke_attempt, + ) + + def _get_metadata_from_chat_response(self, response: ChatCompletion | ChatCompletionChunk) -> dict[str, Any]: + """Get metadata from a chat response.""" + return { + "id": response.id, + "created": response.created, + "system_fingerprint": getattr(response, "system_fingerprint", None), + "usage": CompletionUsage.from_openai(response.usage) if response.usage is not None else None, + } + + def _get_metadata_from_chat_choice(self, choice: Choice | ChunkChoice) -> dict[str, Any]: + """Get metadata from a chat choice.""" + return { + "logprobs": getattr(choice, "logprobs", None), + } + + def _get_tool_calls_from_chat_choice(self, choice: Choice | ChunkChoice) -> list[FunctionCallContent]: + """Get tool calls from a chat choice.""" + content = choice.message if isinstance(choice, Choice) else choice.delta + if content and (tool_calls := getattr(content, "tool_calls", None)) is not None: + return [ + FunctionCallContent( + id=tool.id, + index=getattr(tool, "index", None), + name=tool.function.name, + arguments=tool.function.arguments, + ) + for tool in tool_calls + ] + return [] + + def _get_function_call_from_chat_choice(self, choice: Choice | ChunkChoice) -> list[FunctionCallContent]: + """Get function calls from a chat choice.""" + content = choice.message if isinstance(choice, Choice) else choice.delta + if content and (function_call := getattr(content, "function_call", None)) is not None: + return [ + FunctionCallContent( + id="", + name=function_call.name, + arguments=function_call.arguments, + ) + ] + return [] + + def _handle_structured_output(self, request_settings: NvidiaChatPromptExecutionSettings) -> None: + """Handle structured output for NVIDIA models using nvext parameter.""" + response_format = getattr(request_settings, "response_format", None) + if response_format: + # Convert Pydantic model to JSON schema for NVIDIA's guided_json + if hasattr(response_format, "model_json_schema"): + # It's a Pydantic model + schema = response_format.model_json_schema() + if not request_settings.extra_body: + request_settings.extra_body = {} + request_settings.extra_body["nvext"] = {"guided_json": schema} + elif isinstance(response_format, dict): + # It's already a dict, use it directly + if not request_settings.extra_body: + request_settings.extra_body = {} + request_settings.extra_body["nvext"] = {"guided_json": response_format} + + def _prepare_chat_history_for_request( + self, + chat_history: ChatHistory, + role_key: str = "role", + content_key: str = "content", + ) -> list[dict[str, str]]: + """Prepare chat history for request.""" + messages = [] + for message in chat_history.messages: + message_dict = {role_key: message.role.value, content_key: message.content} + messages.append(message_dict) + return messages diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py index 4aed491c9e52..0f7efff01703 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py @@ -5,10 +5,14 @@ from typing import Any, ClassVar, Union from openai import AsyncOpenAI, AsyncStream -from openai.types import CreateEmbeddingResponse +from openai.types.chat.chat_completion import ChatCompletion +from openai.types.chat.chat_completion_chunk import ChatCompletionChunk +from openai.types.completion import Completion +from openai.types.create_embedding_response import CreateEmbeddingResponse -from semantic_kernel.connectors.ai.nvidia import ( - NvidiaPromptExecutionSettings, +from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( + NvidiaChatPromptExecutionSettings, + NvidiaEmbeddingPromptExecutionSettings, ) from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings @@ -18,7 +22,7 @@ logger: logging.Logger = logging.getLogger(__name__) -RESPONSE_TYPE = Union[list[Any],] +RESPONSE_TYPE = Union[list[Any], ChatCompletion, Completion, AsyncStream[Any]] class NvidiaHandler(KernelBaseModel, ABC): @@ -26,22 +30,23 @@ class NvidiaHandler(KernelBaseModel, ABC): MODEL_PROVIDER_NAME: ClassVar[str] = "nvidia" client: AsyncOpenAI - ai_model_type: NvidiaModelTypes = ( - NvidiaModelTypes.EMBEDDING - ) # TODO: revert this to chat after adding support for chat-compl # noqa: TD002 - prompt_tokens: int = 0 + ai_model_type: NvidiaModelTypes = NvidiaModelTypes.CHAT completion_tokens: int = 0 total_tokens: int = 0 + prompt_tokens: int = 0 async def _send_request(self, settings: PromptExecutionSettings) -> RESPONSE_TYPE: """Send a request to the Nvidia API.""" if self.ai_model_type == NvidiaModelTypes.EMBEDDING: - assert isinstance(settings, NvidiaPromptExecutionSettings) # nosec + assert isinstance(settings, NvidiaEmbeddingPromptExecutionSettings) # nosec return await self._send_embedding_request(settings) + if self.ai_model_type == NvidiaModelTypes.CHAT: + assert isinstance(settings, NvidiaChatPromptExecutionSettings) # nosec + return await self._send_chat_completion_request(settings) raise NotImplementedError(f"Model type {self.ai_model_type} is not supported") - async def _send_embedding_request(self, settings: NvidiaPromptExecutionSettings) -> list[Any]: + async def _send_embedding_request(self, settings: NvidiaEmbeddingPromptExecutionSettings) -> list[Any]: """Send a request to the OpenAI embeddings endpoint.""" try: # unsupported parameters are internally excluded from main dict and added to extra_body @@ -55,9 +60,35 @@ async def _send_embedding_request(self, settings: NvidiaPromptExecutionSettings) ex, ) from ex + async def _send_chat_completion_request( + self, settings: NvidiaChatPromptExecutionSettings + ) -> ChatCompletion | AsyncStream[Any]: + """Send a request to the NVIDIA chat completion endpoint.""" + try: + settings_dict = settings.prepare_settings_dict() + + # Handle structured output if nvext is present in extra_body + if settings.extra_body and "nvext" in settings.extra_body: + if "extra_body" not in settings_dict: + settings_dict["extra_body"] = {} + settings_dict["extra_body"]["nvext"] = settings.extra_body["nvext"] + + response = await self.client.chat.completions.create(**settings_dict) + self.store_usage(response) + return response + except Exception as ex: + raise ServiceResponseException( + f"{type(self)} service failed to complete the chat", + ex, + ) from ex + def store_usage( self, - response: CreateEmbeddingResponse, + response: ChatCompletion + | Completion + | AsyncStream[ChatCompletionChunk] + | AsyncStream[Completion] + | CreateEmbeddingResponse, ): """Store the usage information from the response.""" if not isinstance(response, AsyncStream) and response.usage: diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py index 4e3e12c6b71b..e9222f36a6dd 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py @@ -4,6 +4,7 @@ class NvidiaModelTypes(Enum): - """Nvidia model types, can be text, chat or embedding.""" + """Nvidia model types, can be text, chat, or embedding.""" EMBEDDING = "embedding" + CHAT = "chat" diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py index 1d828963d896..aa0da4d51859 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py @@ -15,7 +15,7 @@ from openai import AsyncOpenAI from pydantic import ValidationError -from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import EmbeddingGeneratorBase +from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( NvidiaEmbeddingPromptExecutionSettings, ) @@ -73,7 +73,10 @@ def __init__( if not nvidia_settings.api_key: logger.warning("API_KEY is missing, inference may fail.") if not client: - client = AsyncOpenAI(api_key=nvidia_settings.api_key.get_secret_value(), base_url=nvidia_settings.base_url) + client = AsyncOpenAI( + api_key=nvidia_settings.api_key.get_secret_value() if nvidia_settings.api_key else None, + base_url=nvidia_settings.base_url, + ) super().__init__( ai_model_id=nvidia_settings.embedding_model_id, api_key=nvidia_settings.api_key.get_secret_value() if nvidia_settings.api_key else None, diff --git a/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py b/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py index fb132df95ab1..0c7eb37641d3 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py +++ b/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py @@ -24,11 +24,14 @@ class NvidiaSettings(KernelBaseSettings): (Env var NVIDIA_BASE_URL) - embedding_model_id: str | None - The NVIDIA embedding model ID to use, for example, nvidia/nv-embed-v1. (Env var NVIDIA_EMBEDDING_MODEL_ID) + - chat_model_id: str | None - The NVIDIA chat model ID to use. + (Env var NVIDIA_CHAT_MODEL_ID) - env_file_path: if provided, the .env settings are read from this file path location """ env_prefix: ClassVar[str] = "NVIDIA_" - api_key: SecretStr + api_key: SecretStr | None = None base_url: str = "https://integrate.api.nvidia.com/v1" - embedding_model_id: str | None + embedding_model_id: str | None = None + chat_model_id: str | None = None diff --git a/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py b/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py new file mode 100644 index 000000000000..8baf734bc3a5 --- /dev/null +++ b/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py @@ -0,0 +1,101 @@ +# Copyright (c) Microsoft. All rights reserved. + +import pytest +from pydantic import BaseModel, ValidationError + +from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( + NvidiaChatPromptExecutionSettings, + NvidiaEmbeddingPromptExecutionSettings, + NvidiaPromptExecutionSettings, +) + + +class TestNvidiaPromptExecutionSettings: + """Test cases for NvidiaPromptExecutionSettings.""" + + def test_init_with_defaults(self): + """Test initialization with default values.""" + settings = NvidiaPromptExecutionSettings() + assert settings.format is None + assert settings.options is None + + def test_init_with_values(self): + """Test initialization with specific values.""" + settings = NvidiaPromptExecutionSettings( + format="json", + options={"key": "value"}, + ) + assert settings.format == "json" + assert settings.options == {"key": "value"} + + def test_validation_format_values(self): + """Test format validation values.""" + # Valid values + settings = NvidiaPromptExecutionSettings(format="json") + assert settings.format == "json" + + +class TestNvidiaChatPromptExecutionSettings: + """Test cases for NvidiaChatPromptExecutionSettings.""" + + def test_init_with_defaults(self): + """Test initialization with default values.""" + settings = NvidiaChatPromptExecutionSettings() + assert settings.messages is None + assert settings.response_format is None + + def test_response_format_with_pydantic_model(self): + """Test response_format with Pydantic model.""" + + class TestModel(BaseModel): + name: str + value: int + + settings = NvidiaChatPromptExecutionSettings(response_format=TestModel) + + assert settings.response_format == TestModel + + def test_response_format_with_dict(self): + """Test response_format with dictionary.""" + settings = NvidiaChatPromptExecutionSettings(response_format={"type": "json_object"}) + + assert settings.response_format == {"type": "json_object"} + + +class TestNvidiaEmbeddingPromptExecutionSettings: + """Test cases for NvidiaEmbeddingPromptExecutionSettings.""" + + def test_init_with_defaults(self): + """Test initialization with default values.""" + settings = NvidiaEmbeddingPromptExecutionSettings() + assert settings.input is None + assert settings.encoding_format == "float" + assert settings.input_type == "query" + assert settings.truncate == "NONE" + + def test_init_with_values(self): + """Test initialization with specific values.""" + settings = NvidiaEmbeddingPromptExecutionSettings( + input=["hello", "world"], + encoding_format="base64", + input_type="passage", + truncate="START", + ) + + assert settings.input == ["hello", "world"] + assert settings.encoding_format == "base64" + assert settings.input_type == "passage" + assert settings.truncate == "START" + + def test_validation_encoding_format(self): + """Test encoding_format validation.""" + # Valid values + settings = NvidiaEmbeddingPromptExecutionSettings(encoding_format="float") + assert settings.encoding_format == "float" + + settings = NvidiaEmbeddingPromptExecutionSettings(encoding_format="base64") + assert settings.encoding_format == "base64" + + # Invalid values + with pytest.raises(ValidationError): + NvidiaEmbeddingPromptExecutionSettings(encoding_format="invalid") diff --git a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py new file mode 100644 index 000000000000..a4008f9a1c05 --- /dev/null +++ b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py @@ -0,0 +1,151 @@ +# Copyright (c) Microsoft. All rights reserved. + +from unittest.mock import AsyncMock, patch + +import pytest +from openai.resources.chat.completions import AsyncCompletions +from openai.types.chat import ChatCompletion, ChatCompletionMessage +from openai.types.chat.chat_completion import Choice +from openai.types.completion_usage import CompletionUsage +from pydantic import BaseModel + +from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion +from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( + NvidiaChatPromptExecutionSettings, +) +from semantic_kernel.connectors.ai.nvidia.services.nvidia_chat_completion import DEFAULT_NVIDIA_CHAT_MODEL +from semantic_kernel.contents import ChatHistory +from semantic_kernel.exceptions import ServiceInitializationError, ServiceResponseException + + +@pytest.fixture +def nvidia_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): + """Fixture to set environment variables for NvidiaChatCompletion.""" + if exclude_list is None: + exclude_list = [] + + if override_env_param_dict is None: + override_env_param_dict = {} + + env_vars = {"NVIDIA_API_KEY": "test_api_key", "NVIDIA_CHAT_MODEL_ID": "meta/llama-3.1-8b-instruct"} + + env_vars.update(override_env_param_dict) + + for key, value in env_vars.items(): + if key not in exclude_list: + monkeypatch.setenv(key, value) + else: + monkeypatch.delenv(key, raising=False) + + return env_vars + + +def _create_mock_chat_completion(content: str = "Hello!") -> ChatCompletion: + """Helper function to create a mock ChatCompletion response.""" + message = ChatCompletionMessage(role="assistant", content=content) + choice = Choice( + finish_reason="stop", + index=0, + message=message, + ) + usage = CompletionUsage(completion_tokens=20, prompt_tokens=10, total_tokens=30) + return ChatCompletion( + id="test-id", + choices=[choice], + created=1234567890, + model="meta/llama-3.1-8b-instruct", + object="chat.completion", + usage=usage, + ) + + +class TestNvidiaChatCompletion: + """Test cases for NvidiaChatCompletion.""" + + def test_init_with_defaults(self, nvidia_unit_test_env): + """Test initialization with default values.""" + service = NvidiaChatCompletion() + assert service.ai_model_id == nvidia_unit_test_env["NVIDIA_CHAT_MODEL_ID"] + + def test_get_prompt_execution_settings_class(self, nvidia_unit_test_env): + """Test getting the prompt execution settings class.""" + service = NvidiaChatCompletion() + from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( + NvidiaChatPromptExecutionSettings, + ) + + assert service.get_prompt_execution_settings_class() == NvidiaChatPromptExecutionSettings + + @pytest.mark.parametrize("exclude_list", [["NVIDIA_API_KEY"]], indirect=True) + def test_init_with_empty_api_key(self, nvidia_unit_test_env): + """Test initialization fails with empty API key.""" + with pytest.raises(ServiceInitializationError): + NvidiaChatCompletion() + + @pytest.mark.parametrize("exclude_list", [["NVIDIA_CHAT_MODEL_ID"]], indirect=True) + def test_init_with_empty_model_id(self, nvidia_unit_test_env): + """Test initialization with empty model ID uses default.""" + service = NvidiaChatCompletion() + assert service.ai_model_id == DEFAULT_NVIDIA_CHAT_MODEL + + def test_init_with_custom_model_id(self, nvidia_unit_test_env): + """Test initialization with custom model ID.""" + custom_model = "custom/nvidia-model" + service = NvidiaChatCompletion(ai_model_id=custom_model) + assert service.ai_model_id == custom_model + + @pytest.mark.asyncio + @patch.object(AsyncCompletions, "create", new_callable=AsyncMock) + async def test_get_chat_message_contents(self, mock_create, nvidia_unit_test_env): + """Test basic chat completion.""" + mock_create.return_value = _create_mock_chat_completion("Hello!") + + service = NvidiaChatCompletion() + chat_history = ChatHistory() + chat_history.add_user_message("Hello") + settings = NvidiaChatPromptExecutionSettings() + + result = await service.get_chat_message_contents(chat_history, settings) + + assert len(result) == 1 + assert result[0].content == "Hello!" + + @pytest.mark.asyncio + @patch.object(AsyncCompletions, "create", new_callable=AsyncMock) + async def test_structured_output_with_pydantic_model(self, mock_create, nvidia_unit_test_env): + """Test structured output with Pydantic model.""" + + # Define test model + class TestModel(BaseModel): + name: str + value: int + + mock_create.return_value = _create_mock_chat_completion('{"name": "test", "value": 42}') + + service = NvidiaChatCompletion() + chat_history = ChatHistory() + chat_history.add_user_message("Give me structured data") + settings = NvidiaChatPromptExecutionSettings() + settings.response_format = TestModel + + await service.get_chat_message_contents(chat_history, settings) + + # Verify nvext was passed + call_args = mock_create.call_args[1] + assert "extra_body" in call_args + assert "nvext" in call_args["extra_body"] + assert "guided_json" in call_args["extra_body"]["nvext"] + + @pytest.mark.asyncio + @patch.object(AsyncCompletions, "create", new_callable=AsyncMock) + async def test_error_handling(self, mock_create, nvidia_unit_test_env): + """Test error handling.""" + mock_create.side_effect = Exception("API Error") + + service = NvidiaChatCompletion() + chat_history = ChatHistory() + chat_history.add_user_message("Hello") + settings = NvidiaChatPromptExecutionSettings() + + with pytest.raises(ServiceResponseException): + await service.get_chat_message_contents(chat_history, settings) diff --git a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py new file mode 100644 index 000000000000..f6a0bfefd247 --- /dev/null +++ b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py @@ -0,0 +1,152 @@ +# Copyright (c) Microsoft. All rights reserved. + +from unittest.mock import AsyncMock, MagicMock + +import pytest +from openai import AsyncOpenAI + +from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( + NvidiaChatPromptExecutionSettings, + NvidiaEmbeddingPromptExecutionSettings, +) +from semantic_kernel.connectors.ai.nvidia.services.nvidia_handler import NvidiaHandler +from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes + + +@pytest.fixture +def mock_openai_client(): + """Create a mock OpenAI client.""" + return AsyncMock(spec=AsyncOpenAI) + + +@pytest.fixture +def nvidia_handler(mock_openai_client): + """Create a NvidiaHandler instance with mocked client.""" + return NvidiaHandler( + client=mock_openai_client, + ai_model_type=NvidiaModelTypes.CHAT, + ai_model_id="test-model", + api_key="test-key", + ) + + +class TestNvidiaHandler: + """Test cases for NvidiaHandler.""" + + def test_init(self, mock_openai_client): + """Test initialization.""" + handler = NvidiaHandler( + client=mock_openai_client, + ai_model_type=NvidiaModelTypes.CHAT, + ) + + assert handler.client == mock_openai_client + assert handler.ai_model_type == NvidiaModelTypes.CHAT + assert handler.MODEL_PROVIDER_NAME == "nvidia" + + @pytest.mark.asyncio + async def test_send_chat_completion_request(self, nvidia_handler, mock_openai_client): + """Test sending chat completion request.""" + # Mock the response + mock_response = MagicMock() + mock_response.choices = [ + MagicMock( + message=MagicMock(role="assistant", content="Hello!"), + finish_reason="stop", + ) + ] + mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20, total_tokens=30) + mock_openai_client.chat.completions.create = AsyncMock(return_value=mock_response) + + # Create settings + settings = NvidiaChatPromptExecutionSettings( + messages=[{"role": "user", "content": "Hello"}], + model="test-model", + ) + + # Test the method + result = await nvidia_handler._send_chat_completion_request(settings) + assert result == mock_response + + # Verify usage was stored + assert nvidia_handler.prompt_tokens == 10 + assert nvidia_handler.completion_tokens == 20 + assert nvidia_handler.total_tokens == 30 + + @pytest.mark.asyncio + async def test_send_chat_completion_request_with_nvext(self, nvidia_handler, mock_openai_client): + """Test sending chat completion request with nvext parameter.""" + # Mock the response + mock_response = MagicMock() + mock_response.choices = [ + MagicMock( + message=MagicMock(role="assistant", content='{"result": "success"}'), + finish_reason="stop", + ) + ] + mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20, total_tokens=30) + mock_openai_client.chat.completions.create = AsyncMock(return_value=mock_response) + + # Create settings with nvext + settings = NvidiaChatPromptExecutionSettings( + messages=[{"role": "user", "content": "Give me JSON"}], + model="test-model", + extra_body={"nvext": {"guided_json": {"type": "object"}}}, + ) + + # Test the method + result = await nvidia_handler._send_chat_completion_request(settings) + assert result == mock_response + + # Verify the client was called with nvext in extra_body + call_args = mock_openai_client.chat.completions.create.call_args[1] + assert "extra_body" in call_args + assert "nvext" in call_args["extra_body"] + assert call_args["extra_body"]["nvext"] == {"guided_json": {"type": "object"}} + + @pytest.mark.asyncio + async def test_send_embedding_request(self, mock_openai_client): + """Test sending embedding request.""" + handler = NvidiaHandler( + client=mock_openai_client, + ai_model_type=NvidiaModelTypes.EMBEDDING, + ai_model_id="test-model", + ) + + # Mock the response + mock_response = MagicMock() + mock_response.data = [ + MagicMock(embedding=[0.1, 0.2, 0.3]), + MagicMock(embedding=[0.4, 0.5, 0.6]), + ] + mock_response.usage = MagicMock(prompt_tokens=10, total_tokens=10) + mock_openai_client.embeddings.create = AsyncMock(return_value=mock_response) + + # Create settings + settings = NvidiaEmbeddingPromptExecutionSettings( + input=["hello", "world"], + model="test-model", + ) + + # Test the method + result = await handler._send_embedding_request(settings) + assert result == [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] + + @pytest.mark.asyncio + async def test_send_request_unsupported_model_type(self, mock_openai_client): + """Test send_request with unsupported model type.""" + # Create a handler with invalid model type by bypassing validation + handler = NvidiaHandler( + client=mock_openai_client, + ai_model_type=NvidiaModelTypes.CHAT, + ) + # Manually set the attribute to bypass Pydantic validation + object.__setattr__(handler, "ai_model_type", "UNSUPPORTED") + + settings = NvidiaChatPromptExecutionSettings( + messages=[{"role": "user", "content": "Hello"}], + model="test-model", + ) + + with pytest.raises(NotImplementedError, match="Model type UNSUPPORTED is not supported"): + await handler._send_request(settings) diff --git a/python/tests/unit/connectors/ai/nvidia/settings/test_nvidia_settings.py b/python/tests/unit/connectors/ai/nvidia/settings/test_nvidia_settings.py new file mode 100644 index 000000000000..4ac818ad480d --- /dev/null +++ b/python/tests/unit/connectors/ai/nvidia/settings/test_nvidia_settings.py @@ -0,0 +1,56 @@ +# Copyright (c) Microsoft. All rights reserved. + + +from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings + + +class TestNvidiaSettings: + """Test cases for NvidiaSettings.""" + + def test_init_with_defaults(self): + """Test initialization with default values.""" + settings = NvidiaSettings() + assert settings.api_key is None + assert settings.base_url == "https://integrate.api.nvidia.com/v1" + assert settings.embedding_model_id is None + assert settings.chat_model_id is None + + def test_init_with_values(self): + """Test initialization with specific values.""" + settings = NvidiaSettings( + api_key="test-api-key", + base_url="https://custom.nvidia.com/v1", + embedding_model_id="test-embedding-model", + chat_model_id="test-chat-model", + ) + + assert settings.api_key.get_secret_value() == "test-api-key" + assert settings.base_url == "https://custom.nvidia.com/v1" + assert settings.embedding_model_id == "test-embedding-model" + assert settings.chat_model_id == "test-chat-model" + + def test_env_prefix(self): + """Test environment variable prefix.""" + assert NvidiaSettings.env_prefix == "NVIDIA_" + + def test_api_key_secret_str(self): + """Test that api_key is properly handled as SecretStr.""" + settings = NvidiaSettings(api_key="secret-key") + + # Should be SecretStr type + assert hasattr(settings.api_key, "get_secret_value") + assert settings.api_key.get_secret_value() == "secret-key" + + # Should not expose the secret in string representation + str_repr = str(settings) + assert "secret-key" not in str_repr + + def test_environment_variables(self, monkeypatch): + """Test that environment variables override defaults.""" + monkeypatch.setenv("NVIDIA_API_KEY", "env-key") + monkeypatch.setenv("NVIDIA_CHAT_MODEL_ID", "env-chat") + + settings = NvidiaSettings() + + assert settings.api_key.get_secret_value() == "env-key" + assert settings.chat_model_id == "env-chat"