diff --git a/python/samples/concepts/setup/ALL_SETTINGS.md b/python/samples/concepts/setup/ALL_SETTINGS.md
index ea7f911a5bb1..cb7ea81af670 100644
--- a/python/samples/concepts/setup/ALL_SETTINGS.md
+++ b/python/samples/concepts/setup/ALL_SETTINGS.md
@@ -30,7 +30,8 @@
| | [VertexAITextEmbedding](../../../semantic_kernel/connectors/ai/google/google_ai/services/google_ai_text_embedding.py) | project_id,
region,
embedding_model_id | VERTEX_AI_PROJECT_ID,
VERTEX_AI_REGION,
VERTEX_AI_EMBEDDING_MODEL_ID | Yes,
No,
Yes | |
| HuggingFace | [HuggingFaceTextCompletion](../../../semantic_kernel/connectors/ai/hugging_face/services/hf_text_completion.py) | ai_model_id | N/A | Yes | |
| | [HuggingFaceTextEmbedding](../../../semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py) | ai_model_id | N/A | Yes | |
-| NVIDIA NIM | [NvidiaTextEmbedding](../../../semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py) | ai_model_id,
api_key,
base_url | NVIDIA_API_KEY,
NVIDIA_TEXT_EMBEDDING_MODEL_ID,
NVIDIA_BASE_URL | Yes | [NvidiaAISettings](../../../semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py) |
+| NVIDIA NIM | [NvidiaChatCompletion](../../../semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py) | ai_model_id,
api_key,
base_url | NVIDIA_CHAT_MODEL_ID,
NVIDIA_API_KEY,
NVIDIA_BASE_URL | Yes (default: meta/llama-3.1-8b-instruct),
Yes,
No | [NvidiaAISettings](../../../semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py) |
+| | [NvidiaTextEmbedding](../../../semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py) | ai_model_id,
api_key,
base_url | NVIDIA_API_KEY,
NVIDIA_TEXT_EMBEDDING_MODEL_ID,
NVIDIA_BASE_URL | Yes | [NvidiaAISettings](../../../semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py) |
| Mistral AI | [MistralAIChatCompletion](../../../semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py) | ai_model_id,
api_key | MISTRALAI_CHAT_MODEL_ID,
MISTRALAI_API_KEY | Yes,
Yes | [MistralAISettings](../../../semantic_kernel/connectors/ai/mistral_ai/settings/mistral_ai_settings.py) |
| | [MistralAITextEmbedding](../../../semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_text_embedding.py) | ai_model_id,
api_key | MISTRALAI_EMBEDDING_MODEL_ID,
MISTRALAI_API_KEY | Yes,
Yes | |
| Ollama | [OllamaChatCompletion](../../../semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py) | ai_model_id,
host | OLLAMA_CHAT_MODEL_ID,
OLLAMA_HOST | Yes,
No | [OllamaSettings](../../../semantic_kernel/connectors/ai/ollama/ollama_settings.py) |
diff --git a/python/samples/concepts/setup/chat_completion_services.py b/python/samples/concepts/setup/chat_completion_services.py
index 4baad81ea968..918e61c536db 100644
--- a/python/samples/concepts/setup/chat_completion_services.py
+++ b/python/samples/concepts/setup/chat_completion_services.py
@@ -28,6 +28,7 @@ class Services(str, Enum):
ONNX = "onnx"
VERTEX_AI = "vertex_ai"
DEEPSEEK = "deepseek"
+ NVIDIA = "nvidia"
service_id = "default"
@@ -64,6 +65,7 @@ def get_chat_completion_service_and_request_settings(
Services.ONNX: lambda: get_onnx_chat_completion_service_and_request_settings(),
Services.VERTEX_AI: lambda: get_vertex_ai_chat_completion_service_and_request_settings(),
Services.DEEPSEEK: lambda: get_deepseek_chat_completion_service_and_request_settings(),
+ Services.NVIDIA: lambda: get_nvidia_chat_completion_service_and_request_settings(),
}
# Call the appropriate lambda or function based on the service name
@@ -414,3 +416,27 @@ def get_deepseek_chat_completion_service_and_request_settings() -> tuple[
request_settings = OpenAIChatPromptExecutionSettings(service_id=service_id)
return chat_service, request_settings
+
+
+def get_nvidia_chat_completion_service_and_request_settings() -> tuple[
+ "ChatCompletionClientBase", "PromptExecutionSettings"
+]:
+ """Return NVIDIA chat completion service and request settings.
+
+ The service credentials can be read by 3 ways:
+ 1. Via the constructor
+ 2. Via the environment variables
+ 3. Via an environment file
+
+ The request settings control the behavior of the service. The default settings are sufficient to get started.
+ However, you can adjust the settings to suit your needs.
+ Note: Some of the settings are NOT meant to be set by the user.
+ Please refer to the Semantic Kernel Python documentation for more information:
+ https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel-python
+ """
+ from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion, NvidiaChatPromptExecutionSettings
+
+ chat_service = NvidiaChatCompletion(service_id=service_id)
+ request_settings = NvidiaChatPromptExecutionSettings(service_id=service_id)
+
+ return chat_service, request_settings
diff --git a/python/semantic_kernel/connectors/ai/nvidia/README.md b/python/semantic_kernel/connectors/ai/nvidia/README.md
index 989446d06a05..0533f5aa4fa9 100644
--- a/python/semantic_kernel/connectors/ai/nvidia/README.md
+++ b/python/semantic_kernel/connectors/ai/nvidia/README.md
@@ -1,6 +1,6 @@
# semantic_kernel.connectors.ai.nvidia
-This connector enables integration with NVIDIA NIM API for text embeddings. It allows you to use NVIDIA's embedding models within the Semantic Kernel framework.
+This connector enables integration with NVIDIA NIM API for text embeddings and chat completion. It allows you to use NVIDIA's models within the Semantic Kernel framework.
## Quick start
@@ -13,6 +13,8 @@ kernel = sk.Kernel()
### Add NVIDIA text embedding service
You can provide your API key directly or through environment variables
```python
+from semantic_kernel.connectors.ai.nvidia import NvidiaTextEmbedding
+
embedding_service = NvidiaTextEmbedding(
ai_model_id="nvidia/nv-embedqa-e5-v5", # Default model if not specified
api_key="your-nvidia-api-key", # Can also use NVIDIA_API_KEY env variable
@@ -30,3 +32,35 @@ kernel.add_service(embedding_service)
texts = ["Hello, world!", "Semantic Kernel is awesome"]
embeddings = await kernel.get_service("nvidia-embeddings").generate_embeddings(texts)
```
+
+### Add NVIDIA chat completion service
+```python
+from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion
+
+chat_service = NvidiaChatCompletion(
+ ai_model_id="meta/llama-3.1-8b-instruct", # Default model if not specified
+ api_key="your-nvidia-api-key", # Can also use NVIDIA_API_KEY env variable
+ service_id="nvidia-chat" # Optional service identifier
+)
+kernel.add_service(chat_service)
+```
+
+### Basic chat completion
+```python
+response = await kernel.invoke_prompt("Hello, how are you?")
+```
+
+### Using with Chat Completion Agent
+```python
+from semantic_kernel.agents import ChatCompletionAgent
+from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion
+
+agent = ChatCompletionAgent(
+ service=NvidiaChatCompletion(),
+ name="SK-Assistant",
+ instructions="You are a helpful assistant.",
+)
+response = await agent.get_response(messages="Write a haiku about Semantic Kernel.")
+print(response.content)
+```
+
diff --git a/python/semantic_kernel/connectors/ai/nvidia/__init__.py b/python/semantic_kernel/connectors/ai/nvidia/__init__.py
index 7a2a6679996d..edaf3fbcd59c 100644
--- a/python/semantic_kernel/connectors/ai/nvidia/__init__.py
+++ b/python/semantic_kernel/connectors/ai/nvidia/__init__.py
@@ -1,13 +1,17 @@
# Copyright (c) Microsoft. All rights reserved.
from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import (
+ NvidiaChatPromptExecutionSettings,
NvidiaEmbeddingPromptExecutionSettings,
NvidiaPromptExecutionSettings,
)
+from semantic_kernel.connectors.ai.nvidia.services.nvidia_chat_completion import NvidiaChatCompletion
from semantic_kernel.connectors.ai.nvidia.services.nvidia_text_embedding import NvidiaTextEmbedding
from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings
__all__ = [
+ "NvidiaChatCompletion",
+ "NvidiaChatPromptExecutionSettings",
"NvidiaEmbeddingPromptExecutionSettings",
"NvidiaPromptExecutionSettings",
"NvidiaSettings",
diff --git a/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py
index 464db5aa1f3b..85f2d49dce05 100644
--- a/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py
@@ -2,7 +2,7 @@
from typing import Annotated, Any, Literal
-from pydantic import Field
+from pydantic import BaseModel, Field
from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
@@ -13,18 +13,6 @@ class NvidiaPromptExecutionSettings(PromptExecutionSettings):
format: Literal["json"] | None = None
options: dict[str, Any] | None = None
- def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
- """Prepare the settings as a dictionary for sending to the AI service.
-
- By default, this method excludes the service_id and extension_data fields.
- As well as any fields that are None.
- """
- return self.model_dump(
- exclude={"service_id", "extension_data", "structured_json_response", "input_type", "truncate"},
- exclude_none=True,
- by_alias=True,
- )
-
class NvidiaEmbeddingPromptExecutionSettings(NvidiaPromptExecutionSettings):
"""Settings for NVIDIA embedding prompt execution."""
@@ -39,3 +27,47 @@ class NvidiaEmbeddingPromptExecutionSettings(NvidiaPromptExecutionSettings):
extra_body: dict | None = None
timeout: float | None = None
dimensions: Annotated[int | None, Field(gt=0)] = None
+
+ def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
+ """Override only for embeddings to exclude input_type and truncate."""
+ return self.model_dump(
+ exclude={"service_id", "extension_data", "structured_json_response", "input_type", "truncate"},
+ exclude_none=True,
+ by_alias=True,
+ )
+
+
+class NvidiaChatPromptExecutionSettings(NvidiaPromptExecutionSettings):
+ """Settings for NVIDIA chat prompt execution."""
+
+ messages: list[dict[str, str]] | None = None
+ ai_model_id: Annotated[str | None, Field(serialization_alias="model")] = None
+ temperature: float | None = None
+ top_p: float | None = None
+ n: int | None = None
+ stream: bool = False
+ stop: str | list[str] | None = None
+ max_tokens: int | None = None
+ presence_penalty: float | None = None
+ frequency_penalty: float | None = None
+ logit_bias: dict[str, float] | None = None
+ user: str | None = None
+ tools: list[dict[str, Any]] | None = None
+ tool_choice: str | dict[str, Any] | None = None
+ response_format: (
+ dict[Literal["type"], Literal["text", "json_object"]] | dict[str, Any] | type[BaseModel] | type | None
+ ) = None
+ seed: int | None = None
+ extra_headers: dict | None = None
+ extra_body: dict | None = None
+ timeout: float | None = None
+ # NVIDIA-specific structured output support
+ nvext: dict[str, Any] | None = None
+
+ def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
+ """Override only for embeddings to exclude input_type and truncate."""
+ return self.model_dump(
+ exclude={"service_id", "extension_data", "structured_json_response", "response_format"},
+ exclude_none=True,
+ by_alias=True,
+ )
diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py
new file mode 100644
index 000000000000..fdd2ae0dce0b
--- /dev/null
+++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py
@@ -0,0 +1,313 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import logging
+import sys
+from collections.abc import AsyncGenerator
+from typing import Any, Literal
+
+from openai import AsyncOpenAI
+from openai.types.chat.chat_completion import ChatCompletion, Choice
+from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
+from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice
+from pydantic import ValidationError
+
+from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
+from semantic_kernel.connectors.ai.completion_usage import CompletionUsage
+from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import (
+ NvidiaChatPromptExecutionSettings,
+)
+from semantic_kernel.connectors.ai.nvidia.services.nvidia_handler import NvidiaHandler
+from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes
+from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings
+from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+from semantic_kernel.contents import (
+ AuthorRole,
+ ChatMessageContent,
+ FinishReason,
+ FunctionCallContent,
+ StreamingChatMessageContent,
+ StreamingTextContent,
+ TextContent,
+)
+from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError
+from semantic_kernel.utils.feature_stage_decorator import experimental
+from semantic_kernel.utils.telemetry.model_diagnostics.decorators import (
+ trace_chat_completion,
+ trace_streaming_chat_completion,
+)
+
+if sys.version_info >= (3, 12):
+ from typing import override # pragma: no cover
+else:
+ from typing_extensions import override # pragma: no cover
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+# Default NVIDIA chat model when none is specified
+DEFAULT_NVIDIA_CHAT_MODEL = "meta/llama-3.1-8b-instruct"
+
+
+@experimental
+class NvidiaChatCompletion(NvidiaHandler, ChatCompletionClientBase):
+ """NVIDIA Chat completion class.
+
+ This class does not support function calling. The SUPPORTS_FUNCTION_CALLING attribute
+ is set to False (inherited from the base class).
+ """
+
+ def __init__(
+ self,
+ ai_model_id: str | None = None,
+ api_key: str | None = None,
+ base_url: str | None = None,
+ service_id: str | None = None,
+ client: AsyncOpenAI | None = None,
+ env_file_path: str | None = None,
+ env_file_encoding: str | None = None,
+ instruction_role: Literal["system", "user", "assistant", "developer"] | None = None,
+ ) -> None:
+ """Initialize an NvidiaChatCompletion service.
+
+ Args:
+ ai_model_id (str): NVIDIA model name, see
+ https://docs.api.nvidia.com/nim/reference/
+ If not provided, defaults to DEFAULT_NVIDIA_CHAT_MODEL.
+ service_id (str | None): Service ID tied to the execution settings.
+ api_key (str | None): The optional API key to use. If provided will override,
+ the env vars or .env file value.
+ base_url (str | None): Custom API endpoint. (Optional)
+ client (Optional[AsyncOpenAI]): An existing client to use. (Optional)
+ env_file_path (str | None): Use the environment settings file as a fallback
+ to environment variables. (Optional)
+ env_file_encoding (str | None): The encoding of the environment settings file. (Optional)
+ instruction_role (Literal["system", "user", "assistant", "developer"] | None): The role to use for
+ 'instruction' messages. Defaults to "system". (Optional)
+ """
+ try:
+ nvidia_settings = NvidiaSettings(
+ api_key=api_key,
+ base_url=base_url,
+ chat_model_id=ai_model_id,
+ env_file_path=env_file_path,
+ env_file_encoding=env_file_encoding,
+ )
+ except ValidationError as ex:
+ raise ServiceInitializationError("Failed to create NVIDIA settings.", ex) from ex
+
+ if not client and not nvidia_settings.api_key:
+ raise ServiceInitializationError("The NVIDIA API key is required.")
+ if not nvidia_settings.chat_model_id:
+ # Default fallback model
+ nvidia_settings.chat_model_id = DEFAULT_NVIDIA_CHAT_MODEL
+ logger.warning(f"Default chat model set as: {nvidia_settings.chat_model_id}")
+
+ # Create client if not provided
+ if not client:
+ client = AsyncOpenAI(
+ api_key=nvidia_settings.api_key.get_secret_value() if nvidia_settings.api_key else None,
+ base_url=nvidia_settings.base_url,
+ )
+
+ super().__init__(
+ ai_model_id=nvidia_settings.chat_model_id,
+ api_key=nvidia_settings.api_key.get_secret_value() if nvidia_settings.api_key else None,
+ base_url=nvidia_settings.base_url,
+ service_id=service_id or "",
+ ai_model_type=NvidiaModelTypes.CHAT,
+ client=client,
+ instruction_role=instruction_role or "system",
+ )
+
+ @classmethod
+ def from_dict(cls: type["NvidiaChatCompletion"], settings: dict[str, Any]) -> "NvidiaChatCompletion":
+ """Initialize an NVIDIA service from a dictionary of settings.
+
+ Args:
+ settings: A dictionary of settings for the service.
+ """
+ return cls(
+ ai_model_id=settings.get("ai_model_id"),
+ api_key=settings.get("api_key"),
+ base_url=settings.get("base_url"),
+ service_id=settings.get("service_id"),
+ env_file_path=settings.get("env_file_path"),
+ )
+
+ @override
+ def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]:
+ return NvidiaChatPromptExecutionSettings
+
+ @override
+ @trace_chat_completion("nvidia")
+ async def _inner_get_chat_message_contents(
+ self,
+ chat_history: "ChatHistory",
+ settings: "PromptExecutionSettings",
+ ) -> list["ChatMessageContent"]:
+ if not isinstance(settings, NvidiaChatPromptExecutionSettings):
+ settings = self.get_prompt_execution_settings_from_settings(settings)
+ assert isinstance(settings, NvidiaChatPromptExecutionSettings) # nosec
+
+ settings.stream = False
+ settings.messages = self._prepare_chat_history_for_request(chat_history)
+ settings.ai_model_id = settings.ai_model_id or self.ai_model_id
+
+ # Handle structured output
+ self._handle_structured_output(settings)
+
+ response = await self._send_request(settings)
+ assert isinstance(response, ChatCompletion) # nosec
+ response_metadata = self._get_metadata_from_chat_response(response)
+ return [self._create_chat_message_content(response, choice, response_metadata) for choice in response.choices]
+
+ @override
+ @trace_streaming_chat_completion("nvidia")
+ async def _inner_get_streaming_chat_message_contents(
+ self,
+ chat_history: "ChatHistory",
+ settings: "PromptExecutionSettings",
+ function_invoke_attempt: int = 0,
+ ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]:
+ if not isinstance(settings, NvidiaChatPromptExecutionSettings):
+ settings = self.get_prompt_execution_settings_from_settings(settings)
+ assert isinstance(settings, NvidiaChatPromptExecutionSettings) # nosec
+
+ settings.stream = True
+ settings.messages = self._prepare_chat_history_for_request(chat_history)
+ settings.ai_model_id = settings.ai_model_id or self.ai_model_id
+
+ # Handle structured output
+ self._handle_structured_output(settings)
+
+ response = await self._send_request(settings)
+ assert isinstance(response, AsyncGenerator) # nosec
+
+ async for chunk in response:
+ if len(chunk.choices) == 0:
+ continue
+ chunk_metadata = self._get_metadata_from_chat_response(chunk)
+ yield [
+ self._create_streaming_chat_message_content(chunk, choice, chunk_metadata, function_invoke_attempt)
+ for choice in chunk.choices
+ ]
+
+ def _create_chat_message_content(
+ self, response: ChatCompletion, choice: Choice, response_metadata: dict[str, Any]
+ ) -> "ChatMessageContent":
+ """Create a chat message content object from a choice."""
+ metadata = self._get_metadata_from_chat_choice(choice)
+ metadata.update(response_metadata)
+
+ items: list[Any] = self._get_tool_calls_from_chat_choice(choice)
+ items.extend(self._get_function_call_from_chat_choice(choice))
+ if choice.message.content:
+ items.append(TextContent(text=choice.message.content))
+
+ return ChatMessageContent(
+ inner_content=response,
+ ai_model_id=self.ai_model_id,
+ metadata=metadata,
+ role=AuthorRole(choice.message.role),
+ items=items,
+ finish_reason=(FinishReason(choice.finish_reason) if choice.finish_reason else None),
+ )
+
+ def _create_streaming_chat_message_content(
+ self,
+ chunk: ChatCompletionChunk,
+ choice: ChunkChoice,
+ chunk_metadata: dict[str, Any],
+ function_invoke_attempt: int,
+ ) -> StreamingChatMessageContent:
+ """Create a streaming chat message content object from a choice."""
+ metadata = self._get_metadata_from_chat_choice(choice)
+ metadata.update(chunk_metadata)
+
+ items: list[Any] = self._get_tool_calls_from_chat_choice(choice)
+ items.extend(self._get_function_call_from_chat_choice(choice))
+ if choice.delta and choice.delta.content is not None:
+ items.append(StreamingTextContent(choice_index=choice.index, text=choice.delta.content))
+ return StreamingChatMessageContent(
+ choice_index=choice.index,
+ inner_content=chunk,
+ ai_model_id=self.ai_model_id,
+ metadata=metadata,
+ role=(AuthorRole(choice.delta.role) if choice.delta and choice.delta.role else AuthorRole.ASSISTANT),
+ finish_reason=(FinishReason(choice.finish_reason) if choice.finish_reason else None),
+ items=items,
+ function_invoke_attempt=function_invoke_attempt,
+ )
+
+ def _get_metadata_from_chat_response(self, response: ChatCompletion | ChatCompletionChunk) -> dict[str, Any]:
+ """Get metadata from a chat response."""
+ return {
+ "id": response.id,
+ "created": response.created,
+ "system_fingerprint": getattr(response, "system_fingerprint", None),
+ "usage": CompletionUsage.from_openai(response.usage) if response.usage is not None else None,
+ }
+
+ def _get_metadata_from_chat_choice(self, choice: Choice | ChunkChoice) -> dict[str, Any]:
+ """Get metadata from a chat choice."""
+ return {
+ "logprobs": getattr(choice, "logprobs", None),
+ }
+
+ def _get_tool_calls_from_chat_choice(self, choice: Choice | ChunkChoice) -> list[FunctionCallContent]:
+ """Get tool calls from a chat choice."""
+ content = choice.message if isinstance(choice, Choice) else choice.delta
+ if content and (tool_calls := getattr(content, "tool_calls", None)) is not None:
+ return [
+ FunctionCallContent(
+ id=tool.id,
+ index=getattr(tool, "index", None),
+ name=tool.function.name,
+ arguments=tool.function.arguments,
+ )
+ for tool in tool_calls
+ ]
+ return []
+
+ def _get_function_call_from_chat_choice(self, choice: Choice | ChunkChoice) -> list[FunctionCallContent]:
+ """Get function calls from a chat choice."""
+ content = choice.message if isinstance(choice, Choice) else choice.delta
+ if content and (function_call := getattr(content, "function_call", None)) is not None:
+ return [
+ FunctionCallContent(
+ id="",
+ name=function_call.name,
+ arguments=function_call.arguments,
+ )
+ ]
+ return []
+
+ def _handle_structured_output(self, request_settings: NvidiaChatPromptExecutionSettings) -> None:
+ """Handle structured output for NVIDIA models using nvext parameter."""
+ response_format = getattr(request_settings, "response_format", None)
+ if response_format:
+ # Convert Pydantic model to JSON schema for NVIDIA's guided_json
+ if hasattr(response_format, "model_json_schema"):
+ # It's a Pydantic model
+ schema = response_format.model_json_schema()
+ if not request_settings.extra_body:
+ request_settings.extra_body = {}
+ request_settings.extra_body["nvext"] = {"guided_json": schema}
+ elif isinstance(response_format, dict):
+ # It's already a dict, use it directly
+ if not request_settings.extra_body:
+ request_settings.extra_body = {}
+ request_settings.extra_body["nvext"] = {"guided_json": response_format}
+
+ def _prepare_chat_history_for_request(
+ self,
+ chat_history: ChatHistory,
+ role_key: str = "role",
+ content_key: str = "content",
+ ) -> list[dict[str, str]]:
+ """Prepare chat history for request."""
+ messages = []
+ for message in chat_history.messages:
+ message_dict = {role_key: message.role.value, content_key: message.content}
+ messages.append(message_dict)
+ return messages
diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py
index 4aed491c9e52..0f7efff01703 100644
--- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py
+++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py
@@ -5,10 +5,14 @@
from typing import Any, ClassVar, Union
from openai import AsyncOpenAI, AsyncStream
-from openai.types import CreateEmbeddingResponse
+from openai.types.chat.chat_completion import ChatCompletion
+from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
+from openai.types.completion import Completion
+from openai.types.create_embedding_response import CreateEmbeddingResponse
-from semantic_kernel.connectors.ai.nvidia import (
- NvidiaPromptExecutionSettings,
+from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import (
+ NvidiaChatPromptExecutionSettings,
+ NvidiaEmbeddingPromptExecutionSettings,
)
from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes
from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
@@ -18,7 +22,7 @@
logger: logging.Logger = logging.getLogger(__name__)
-RESPONSE_TYPE = Union[list[Any],]
+RESPONSE_TYPE = Union[list[Any], ChatCompletion, Completion, AsyncStream[Any]]
class NvidiaHandler(KernelBaseModel, ABC):
@@ -26,22 +30,23 @@ class NvidiaHandler(KernelBaseModel, ABC):
MODEL_PROVIDER_NAME: ClassVar[str] = "nvidia"
client: AsyncOpenAI
- ai_model_type: NvidiaModelTypes = (
- NvidiaModelTypes.EMBEDDING
- ) # TODO: revert this to chat after adding support for chat-compl # noqa: TD002
- prompt_tokens: int = 0
+ ai_model_type: NvidiaModelTypes = NvidiaModelTypes.CHAT
completion_tokens: int = 0
total_tokens: int = 0
+ prompt_tokens: int = 0
async def _send_request(self, settings: PromptExecutionSettings) -> RESPONSE_TYPE:
"""Send a request to the Nvidia API."""
if self.ai_model_type == NvidiaModelTypes.EMBEDDING:
- assert isinstance(settings, NvidiaPromptExecutionSettings) # nosec
+ assert isinstance(settings, NvidiaEmbeddingPromptExecutionSettings) # nosec
return await self._send_embedding_request(settings)
+ if self.ai_model_type == NvidiaModelTypes.CHAT:
+ assert isinstance(settings, NvidiaChatPromptExecutionSettings) # nosec
+ return await self._send_chat_completion_request(settings)
raise NotImplementedError(f"Model type {self.ai_model_type} is not supported")
- async def _send_embedding_request(self, settings: NvidiaPromptExecutionSettings) -> list[Any]:
+ async def _send_embedding_request(self, settings: NvidiaEmbeddingPromptExecutionSettings) -> list[Any]:
"""Send a request to the OpenAI embeddings endpoint."""
try:
# unsupported parameters are internally excluded from main dict and added to extra_body
@@ -55,9 +60,35 @@ async def _send_embedding_request(self, settings: NvidiaPromptExecutionSettings)
ex,
) from ex
+ async def _send_chat_completion_request(
+ self, settings: NvidiaChatPromptExecutionSettings
+ ) -> ChatCompletion | AsyncStream[Any]:
+ """Send a request to the NVIDIA chat completion endpoint."""
+ try:
+ settings_dict = settings.prepare_settings_dict()
+
+ # Handle structured output if nvext is present in extra_body
+ if settings.extra_body and "nvext" in settings.extra_body:
+ if "extra_body" not in settings_dict:
+ settings_dict["extra_body"] = {}
+ settings_dict["extra_body"]["nvext"] = settings.extra_body["nvext"]
+
+ response = await self.client.chat.completions.create(**settings_dict)
+ self.store_usage(response)
+ return response
+ except Exception as ex:
+ raise ServiceResponseException(
+ f"{type(self)} service failed to complete the chat",
+ ex,
+ ) from ex
+
def store_usage(
self,
- response: CreateEmbeddingResponse,
+ response: ChatCompletion
+ | Completion
+ | AsyncStream[ChatCompletionChunk]
+ | AsyncStream[Completion]
+ | CreateEmbeddingResponse,
):
"""Store the usage information from the response."""
if not isinstance(response, AsyncStream) and response.usage:
diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py
index 4e3e12c6b71b..e9222f36a6dd 100644
--- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py
+++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py
@@ -4,6 +4,7 @@
class NvidiaModelTypes(Enum):
- """Nvidia model types, can be text, chat or embedding."""
+ """Nvidia model types, can be text, chat, or embedding."""
EMBEDDING = "embedding"
+ CHAT = "chat"
diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py
index 1d828963d896..aa0da4d51859 100644
--- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py
+++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py
@@ -15,7 +15,7 @@
from openai import AsyncOpenAI
from pydantic import ValidationError
-from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import EmbeddingGeneratorBase
+from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase
from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import (
NvidiaEmbeddingPromptExecutionSettings,
)
@@ -73,7 +73,10 @@ def __init__(
if not nvidia_settings.api_key:
logger.warning("API_KEY is missing, inference may fail.")
if not client:
- client = AsyncOpenAI(api_key=nvidia_settings.api_key.get_secret_value(), base_url=nvidia_settings.base_url)
+ client = AsyncOpenAI(
+ api_key=nvidia_settings.api_key.get_secret_value() if nvidia_settings.api_key else None,
+ base_url=nvidia_settings.base_url,
+ )
super().__init__(
ai_model_id=nvidia_settings.embedding_model_id,
api_key=nvidia_settings.api_key.get_secret_value() if nvidia_settings.api_key else None,
diff --git a/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py b/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py
index fb132df95ab1..0c7eb37641d3 100644
--- a/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py
+++ b/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py
@@ -24,11 +24,14 @@ class NvidiaSettings(KernelBaseSettings):
(Env var NVIDIA_BASE_URL)
- embedding_model_id: str | None - The NVIDIA embedding model ID to use, for example, nvidia/nv-embed-v1.
(Env var NVIDIA_EMBEDDING_MODEL_ID)
+ - chat_model_id: str | None - The NVIDIA chat model ID to use.
+ (Env var NVIDIA_CHAT_MODEL_ID)
- env_file_path: if provided, the .env settings are read from this file path location
"""
env_prefix: ClassVar[str] = "NVIDIA_"
- api_key: SecretStr
+ api_key: SecretStr | None = None
base_url: str = "https://integrate.api.nvidia.com/v1"
- embedding_model_id: str | None
+ embedding_model_id: str | None = None
+ chat_model_id: str | None = None
diff --git a/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py b/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py
new file mode 100644
index 000000000000..8baf734bc3a5
--- /dev/null
+++ b/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py
@@ -0,0 +1,101 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import pytest
+from pydantic import BaseModel, ValidationError
+
+from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import (
+ NvidiaChatPromptExecutionSettings,
+ NvidiaEmbeddingPromptExecutionSettings,
+ NvidiaPromptExecutionSettings,
+)
+
+
+class TestNvidiaPromptExecutionSettings:
+ """Test cases for NvidiaPromptExecutionSettings."""
+
+ def test_init_with_defaults(self):
+ """Test initialization with default values."""
+ settings = NvidiaPromptExecutionSettings()
+ assert settings.format is None
+ assert settings.options is None
+
+ def test_init_with_values(self):
+ """Test initialization with specific values."""
+ settings = NvidiaPromptExecutionSettings(
+ format="json",
+ options={"key": "value"},
+ )
+ assert settings.format == "json"
+ assert settings.options == {"key": "value"}
+
+ def test_validation_format_values(self):
+ """Test format validation values."""
+ # Valid values
+ settings = NvidiaPromptExecutionSettings(format="json")
+ assert settings.format == "json"
+
+
+class TestNvidiaChatPromptExecutionSettings:
+ """Test cases for NvidiaChatPromptExecutionSettings."""
+
+ def test_init_with_defaults(self):
+ """Test initialization with default values."""
+ settings = NvidiaChatPromptExecutionSettings()
+ assert settings.messages is None
+ assert settings.response_format is None
+
+ def test_response_format_with_pydantic_model(self):
+ """Test response_format with Pydantic model."""
+
+ class TestModel(BaseModel):
+ name: str
+ value: int
+
+ settings = NvidiaChatPromptExecutionSettings(response_format=TestModel)
+
+ assert settings.response_format == TestModel
+
+ def test_response_format_with_dict(self):
+ """Test response_format with dictionary."""
+ settings = NvidiaChatPromptExecutionSettings(response_format={"type": "json_object"})
+
+ assert settings.response_format == {"type": "json_object"}
+
+
+class TestNvidiaEmbeddingPromptExecutionSettings:
+ """Test cases for NvidiaEmbeddingPromptExecutionSettings."""
+
+ def test_init_with_defaults(self):
+ """Test initialization with default values."""
+ settings = NvidiaEmbeddingPromptExecutionSettings()
+ assert settings.input is None
+ assert settings.encoding_format == "float"
+ assert settings.input_type == "query"
+ assert settings.truncate == "NONE"
+
+ def test_init_with_values(self):
+ """Test initialization with specific values."""
+ settings = NvidiaEmbeddingPromptExecutionSettings(
+ input=["hello", "world"],
+ encoding_format="base64",
+ input_type="passage",
+ truncate="START",
+ )
+
+ assert settings.input == ["hello", "world"]
+ assert settings.encoding_format == "base64"
+ assert settings.input_type == "passage"
+ assert settings.truncate == "START"
+
+ def test_validation_encoding_format(self):
+ """Test encoding_format validation."""
+ # Valid values
+ settings = NvidiaEmbeddingPromptExecutionSettings(encoding_format="float")
+ assert settings.encoding_format == "float"
+
+ settings = NvidiaEmbeddingPromptExecutionSettings(encoding_format="base64")
+ assert settings.encoding_format == "base64"
+
+ # Invalid values
+ with pytest.raises(ValidationError):
+ NvidiaEmbeddingPromptExecutionSettings(encoding_format="invalid")
diff --git a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py
new file mode 100644
index 000000000000..a4008f9a1c05
--- /dev/null
+++ b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py
@@ -0,0 +1,151 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from openai.resources.chat.completions import AsyncCompletions
+from openai.types.chat import ChatCompletion, ChatCompletionMessage
+from openai.types.chat.chat_completion import Choice
+from openai.types.completion_usage import CompletionUsage
+from pydantic import BaseModel
+
+from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion
+from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import (
+ NvidiaChatPromptExecutionSettings,
+)
+from semantic_kernel.connectors.ai.nvidia.services.nvidia_chat_completion import DEFAULT_NVIDIA_CHAT_MODEL
+from semantic_kernel.contents import ChatHistory
+from semantic_kernel.exceptions import ServiceInitializationError, ServiceResponseException
+
+
+@pytest.fixture
+def nvidia_unit_test_env(monkeypatch, exclude_list, override_env_param_dict):
+ """Fixture to set environment variables for NvidiaChatCompletion."""
+ if exclude_list is None:
+ exclude_list = []
+
+ if override_env_param_dict is None:
+ override_env_param_dict = {}
+
+ env_vars = {"NVIDIA_API_KEY": "test_api_key", "NVIDIA_CHAT_MODEL_ID": "meta/llama-3.1-8b-instruct"}
+
+ env_vars.update(override_env_param_dict)
+
+ for key, value in env_vars.items():
+ if key not in exclude_list:
+ monkeypatch.setenv(key, value)
+ else:
+ monkeypatch.delenv(key, raising=False)
+
+ return env_vars
+
+
+def _create_mock_chat_completion(content: str = "Hello!") -> ChatCompletion:
+ """Helper function to create a mock ChatCompletion response."""
+ message = ChatCompletionMessage(role="assistant", content=content)
+ choice = Choice(
+ finish_reason="stop",
+ index=0,
+ message=message,
+ )
+ usage = CompletionUsage(completion_tokens=20, prompt_tokens=10, total_tokens=30)
+ return ChatCompletion(
+ id="test-id",
+ choices=[choice],
+ created=1234567890,
+ model="meta/llama-3.1-8b-instruct",
+ object="chat.completion",
+ usage=usage,
+ )
+
+
+class TestNvidiaChatCompletion:
+ """Test cases for NvidiaChatCompletion."""
+
+ def test_init_with_defaults(self, nvidia_unit_test_env):
+ """Test initialization with default values."""
+ service = NvidiaChatCompletion()
+ assert service.ai_model_id == nvidia_unit_test_env["NVIDIA_CHAT_MODEL_ID"]
+
+ def test_get_prompt_execution_settings_class(self, nvidia_unit_test_env):
+ """Test getting the prompt execution settings class."""
+ service = NvidiaChatCompletion()
+ from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import (
+ NvidiaChatPromptExecutionSettings,
+ )
+
+ assert service.get_prompt_execution_settings_class() == NvidiaChatPromptExecutionSettings
+
+ @pytest.mark.parametrize("exclude_list", [["NVIDIA_API_KEY"]], indirect=True)
+ def test_init_with_empty_api_key(self, nvidia_unit_test_env):
+ """Test initialization fails with empty API key."""
+ with pytest.raises(ServiceInitializationError):
+ NvidiaChatCompletion()
+
+ @pytest.mark.parametrize("exclude_list", [["NVIDIA_CHAT_MODEL_ID"]], indirect=True)
+ def test_init_with_empty_model_id(self, nvidia_unit_test_env):
+ """Test initialization with empty model ID uses default."""
+ service = NvidiaChatCompletion()
+ assert service.ai_model_id == DEFAULT_NVIDIA_CHAT_MODEL
+
+ def test_init_with_custom_model_id(self, nvidia_unit_test_env):
+ """Test initialization with custom model ID."""
+ custom_model = "custom/nvidia-model"
+ service = NvidiaChatCompletion(ai_model_id=custom_model)
+ assert service.ai_model_id == custom_model
+
+ @pytest.mark.asyncio
+ @patch.object(AsyncCompletions, "create", new_callable=AsyncMock)
+ async def test_get_chat_message_contents(self, mock_create, nvidia_unit_test_env):
+ """Test basic chat completion."""
+ mock_create.return_value = _create_mock_chat_completion("Hello!")
+
+ service = NvidiaChatCompletion()
+ chat_history = ChatHistory()
+ chat_history.add_user_message("Hello")
+ settings = NvidiaChatPromptExecutionSettings()
+
+ result = await service.get_chat_message_contents(chat_history, settings)
+
+ assert len(result) == 1
+ assert result[0].content == "Hello!"
+
+ @pytest.mark.asyncio
+ @patch.object(AsyncCompletions, "create", new_callable=AsyncMock)
+ async def test_structured_output_with_pydantic_model(self, mock_create, nvidia_unit_test_env):
+ """Test structured output with Pydantic model."""
+
+ # Define test model
+ class TestModel(BaseModel):
+ name: str
+ value: int
+
+ mock_create.return_value = _create_mock_chat_completion('{"name": "test", "value": 42}')
+
+ service = NvidiaChatCompletion()
+ chat_history = ChatHistory()
+ chat_history.add_user_message("Give me structured data")
+ settings = NvidiaChatPromptExecutionSettings()
+ settings.response_format = TestModel
+
+ await service.get_chat_message_contents(chat_history, settings)
+
+ # Verify nvext was passed
+ call_args = mock_create.call_args[1]
+ assert "extra_body" in call_args
+ assert "nvext" in call_args["extra_body"]
+ assert "guided_json" in call_args["extra_body"]["nvext"]
+
+ @pytest.mark.asyncio
+ @patch.object(AsyncCompletions, "create", new_callable=AsyncMock)
+ async def test_error_handling(self, mock_create, nvidia_unit_test_env):
+ """Test error handling."""
+ mock_create.side_effect = Exception("API Error")
+
+ service = NvidiaChatCompletion()
+ chat_history = ChatHistory()
+ chat_history.add_user_message("Hello")
+ settings = NvidiaChatPromptExecutionSettings()
+
+ with pytest.raises(ServiceResponseException):
+ await service.get_chat_message_contents(chat_history, settings)
diff --git a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py
new file mode 100644
index 000000000000..f6a0bfefd247
--- /dev/null
+++ b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py
@@ -0,0 +1,152 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+from openai import AsyncOpenAI
+
+from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import (
+ NvidiaChatPromptExecutionSettings,
+ NvidiaEmbeddingPromptExecutionSettings,
+)
+from semantic_kernel.connectors.ai.nvidia.services.nvidia_handler import NvidiaHandler
+from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes
+
+
+@pytest.fixture
+def mock_openai_client():
+ """Create a mock OpenAI client."""
+ return AsyncMock(spec=AsyncOpenAI)
+
+
+@pytest.fixture
+def nvidia_handler(mock_openai_client):
+ """Create a NvidiaHandler instance with mocked client."""
+ return NvidiaHandler(
+ client=mock_openai_client,
+ ai_model_type=NvidiaModelTypes.CHAT,
+ ai_model_id="test-model",
+ api_key="test-key",
+ )
+
+
+class TestNvidiaHandler:
+ """Test cases for NvidiaHandler."""
+
+ def test_init(self, mock_openai_client):
+ """Test initialization."""
+ handler = NvidiaHandler(
+ client=mock_openai_client,
+ ai_model_type=NvidiaModelTypes.CHAT,
+ )
+
+ assert handler.client == mock_openai_client
+ assert handler.ai_model_type == NvidiaModelTypes.CHAT
+ assert handler.MODEL_PROVIDER_NAME == "nvidia"
+
+ @pytest.mark.asyncio
+ async def test_send_chat_completion_request(self, nvidia_handler, mock_openai_client):
+ """Test sending chat completion request."""
+ # Mock the response
+ mock_response = MagicMock()
+ mock_response.choices = [
+ MagicMock(
+ message=MagicMock(role="assistant", content="Hello!"),
+ finish_reason="stop",
+ )
+ ]
+ mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20, total_tokens=30)
+ mock_openai_client.chat.completions.create = AsyncMock(return_value=mock_response)
+
+ # Create settings
+ settings = NvidiaChatPromptExecutionSettings(
+ messages=[{"role": "user", "content": "Hello"}],
+ model="test-model",
+ )
+
+ # Test the method
+ result = await nvidia_handler._send_chat_completion_request(settings)
+ assert result == mock_response
+
+ # Verify usage was stored
+ assert nvidia_handler.prompt_tokens == 10
+ assert nvidia_handler.completion_tokens == 20
+ assert nvidia_handler.total_tokens == 30
+
+ @pytest.mark.asyncio
+ async def test_send_chat_completion_request_with_nvext(self, nvidia_handler, mock_openai_client):
+ """Test sending chat completion request with nvext parameter."""
+ # Mock the response
+ mock_response = MagicMock()
+ mock_response.choices = [
+ MagicMock(
+ message=MagicMock(role="assistant", content='{"result": "success"}'),
+ finish_reason="stop",
+ )
+ ]
+ mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20, total_tokens=30)
+ mock_openai_client.chat.completions.create = AsyncMock(return_value=mock_response)
+
+ # Create settings with nvext
+ settings = NvidiaChatPromptExecutionSettings(
+ messages=[{"role": "user", "content": "Give me JSON"}],
+ model="test-model",
+ extra_body={"nvext": {"guided_json": {"type": "object"}}},
+ )
+
+ # Test the method
+ result = await nvidia_handler._send_chat_completion_request(settings)
+ assert result == mock_response
+
+ # Verify the client was called with nvext in extra_body
+ call_args = mock_openai_client.chat.completions.create.call_args[1]
+ assert "extra_body" in call_args
+ assert "nvext" in call_args["extra_body"]
+ assert call_args["extra_body"]["nvext"] == {"guided_json": {"type": "object"}}
+
+ @pytest.mark.asyncio
+ async def test_send_embedding_request(self, mock_openai_client):
+ """Test sending embedding request."""
+ handler = NvidiaHandler(
+ client=mock_openai_client,
+ ai_model_type=NvidiaModelTypes.EMBEDDING,
+ ai_model_id="test-model",
+ )
+
+ # Mock the response
+ mock_response = MagicMock()
+ mock_response.data = [
+ MagicMock(embedding=[0.1, 0.2, 0.3]),
+ MagicMock(embedding=[0.4, 0.5, 0.6]),
+ ]
+ mock_response.usage = MagicMock(prompt_tokens=10, total_tokens=10)
+ mock_openai_client.embeddings.create = AsyncMock(return_value=mock_response)
+
+ # Create settings
+ settings = NvidiaEmbeddingPromptExecutionSettings(
+ input=["hello", "world"],
+ model="test-model",
+ )
+
+ # Test the method
+ result = await handler._send_embedding_request(settings)
+ assert result == [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
+
+ @pytest.mark.asyncio
+ async def test_send_request_unsupported_model_type(self, mock_openai_client):
+ """Test send_request with unsupported model type."""
+ # Create a handler with invalid model type by bypassing validation
+ handler = NvidiaHandler(
+ client=mock_openai_client,
+ ai_model_type=NvidiaModelTypes.CHAT,
+ )
+ # Manually set the attribute to bypass Pydantic validation
+ object.__setattr__(handler, "ai_model_type", "UNSUPPORTED")
+
+ settings = NvidiaChatPromptExecutionSettings(
+ messages=[{"role": "user", "content": "Hello"}],
+ model="test-model",
+ )
+
+ with pytest.raises(NotImplementedError, match="Model type UNSUPPORTED is not supported"):
+ await handler._send_request(settings)
diff --git a/python/tests/unit/connectors/ai/nvidia/settings/test_nvidia_settings.py b/python/tests/unit/connectors/ai/nvidia/settings/test_nvidia_settings.py
new file mode 100644
index 000000000000..4ac818ad480d
--- /dev/null
+++ b/python/tests/unit/connectors/ai/nvidia/settings/test_nvidia_settings.py
@@ -0,0 +1,56 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+
+from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings
+
+
+class TestNvidiaSettings:
+ """Test cases for NvidiaSettings."""
+
+ def test_init_with_defaults(self):
+ """Test initialization with default values."""
+ settings = NvidiaSettings()
+ assert settings.api_key is None
+ assert settings.base_url == "https://integrate.api.nvidia.com/v1"
+ assert settings.embedding_model_id is None
+ assert settings.chat_model_id is None
+
+ def test_init_with_values(self):
+ """Test initialization with specific values."""
+ settings = NvidiaSettings(
+ api_key="test-api-key",
+ base_url="https://custom.nvidia.com/v1",
+ embedding_model_id="test-embedding-model",
+ chat_model_id="test-chat-model",
+ )
+
+ assert settings.api_key.get_secret_value() == "test-api-key"
+ assert settings.base_url == "https://custom.nvidia.com/v1"
+ assert settings.embedding_model_id == "test-embedding-model"
+ assert settings.chat_model_id == "test-chat-model"
+
+ def test_env_prefix(self):
+ """Test environment variable prefix."""
+ assert NvidiaSettings.env_prefix == "NVIDIA_"
+
+ def test_api_key_secret_str(self):
+ """Test that api_key is properly handled as SecretStr."""
+ settings = NvidiaSettings(api_key="secret-key")
+
+ # Should be SecretStr type
+ assert hasattr(settings.api_key, "get_secret_value")
+ assert settings.api_key.get_secret_value() == "secret-key"
+
+ # Should not expose the secret in string representation
+ str_repr = str(settings)
+ assert "secret-key" not in str_repr
+
+ def test_environment_variables(self, monkeypatch):
+ """Test that environment variables override defaults."""
+ monkeypatch.setenv("NVIDIA_API_KEY", "env-key")
+ monkeypatch.setenv("NVIDIA_CHAT_MODEL_ID", "env-chat")
+
+ settings = NvidiaSettings()
+
+ assert settings.api_key.get_secret_value() == "env-key"
+ assert settings.chat_model_id == "env-chat"