From 4562be365146891d0c9eb75efb89971e49d78306 Mon Sep 17 00:00:00 2001 From: Soumili Nandi Date: Tue, 15 Jul 2025 19:35:24 -0700 Subject: [PATCH 01/11] adding files for chat models --- .../connectors/ai/nvidia/__init__.py | 4 + .../nvidia_prompt_execution_settings.py | 53 +++- .../nvidia/services/nvidia_chat_completion.py | 284 ++++++++++++++++++ .../ai/nvidia/services/nvidia_handler.py | 51 +++- .../ai/nvidia/services/nvidia_model_types.py | 4 +- .../nvidia/services/nvidia_text_embedding.py | 2 +- .../ai/nvidia/settings/nvidia_settings.py | 5 +- 7 files changed, 380 insertions(+), 23 deletions(-) create mode 100644 python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py diff --git a/python/semantic_kernel/connectors/ai/nvidia/__init__.py b/python/semantic_kernel/connectors/ai/nvidia/__init__.py index 7a2a6679996d..edaf3fbcd59c 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/__init__.py +++ b/python/semantic_kernel/connectors/ai/nvidia/__init__.py @@ -1,13 +1,17 @@ # Copyright (c) Microsoft. All rights reserved. from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( + NvidiaChatPromptExecutionSettings, NvidiaEmbeddingPromptExecutionSettings, NvidiaPromptExecutionSettings, ) +from semantic_kernel.connectors.ai.nvidia.services.nvidia_chat_completion import NvidiaChatCompletion from semantic_kernel.connectors.ai.nvidia.services.nvidia_text_embedding import NvidiaTextEmbedding from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings __all__ = [ + "NvidiaChatCompletion", + "NvidiaChatPromptExecutionSettings", "NvidiaEmbeddingPromptExecutionSettings", "NvidiaPromptExecutionSettings", "NvidiaSettings", diff --git a/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py index 464db5aa1f3b..159473e7e277 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py +++ b/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py @@ -13,18 +13,6 @@ class NvidiaPromptExecutionSettings(PromptExecutionSettings): format: Literal["json"] | None = None options: dict[str, Any] | None = None - def prepare_settings_dict(self, **kwargs) -> dict[str, Any]: - """Prepare the settings as a dictionary for sending to the AI service. - - By default, this method excludes the service_id and extension_data fields. - As well as any fields that are None. - """ - return self.model_dump( - exclude={"service_id", "extension_data", "structured_json_response", "input_type", "truncate"}, - exclude_none=True, - by_alias=True, - ) - class NvidiaEmbeddingPromptExecutionSettings(NvidiaPromptExecutionSettings): """Settings for NVIDIA embedding prompt execution.""" @@ -39,3 +27,44 @@ class NvidiaEmbeddingPromptExecutionSettings(NvidiaPromptExecutionSettings): extra_body: dict | None = None timeout: float | None = None dimensions: Annotated[int | None, Field(gt=0)] = None + + def prepare_settings_dict(self, **kwargs) -> dict[str, Any]: + """Override only for embeddings to exclude input_type and truncate.""" + return self.model_dump( + exclude={"service_id", "extension_data", "structured_json_response", "input_type", "truncate"}, + exclude_none=True, + by_alias=True, + ) + + +class NvidiaChatPromptExecutionSettings(NvidiaPromptExecutionSettings): + """Settings for NVIDIA chat prompt execution.""" + + messages: list[dict[str, str]] | None = None + ai_model_id: Annotated[str | None, Field(serialization_alias="model")] = None + temperature: float | None = None + top_p: float | None = None + n: int | None = None + stream: bool = False + stop: str | list[str] | None = None + max_tokens: int | None = None + presence_penalty: float | None = None + frequency_penalty: float | None = None + logit_bias: dict[str, float] | None = None + user: str | None = None + tools: list[dict[str, Any]] | None = None + tool_choice: str | dict[str, Any] | None = None + response_format: dict[str, str] | None = None + thinking_mode: bool | None = None + seed: int | None = None + extra_headers: dict | None = None + extra_body: dict | None = None + timeout: float | None = None + + def prepare_settings_dict(self, **kwargs) -> dict[str, Any]: + """Override only for embeddings to exclude input_type and truncate.""" + return self.model_dump( + exclude={"service_id", "extension_data", "structured_json_response", "thinking_mode"}, + exclude_none=True, + by_alias=True, + ) \ No newline at end of file diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py new file mode 100644 index 000000000000..292292b6688b --- /dev/null +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py @@ -0,0 +1,284 @@ +# Copyright (c) Microsoft. All rights reserved. + +import logging +import sys +from collections.abc import AsyncGenerator +from typing import Any, TYPE_CHECKING + +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + +from openai import AsyncOpenAI +from pydantic import ValidationError + +from openai.types.chat.chat_completion import ChatCompletion, Choice +from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice as ChunkChoice + +from semantic_kernel.contents import ( + AuthorRole, + ChatMessageContent, + FinishReason, + FunctionCallContent, + StreamingChatMessageContent, + StreamingTextContent, + TextContent, +) +from semantic_kernel.contents.chat_history import ChatHistory +from semantic_kernel.connectors.ai.completion_usage import CompletionUsage +from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( + NvidiaChatPromptExecutionSettings, +) +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.utils.telemetry.model_diagnostics.decorators import ( + trace_chat_completion, + trace_streaming_chat_completion, +) + +if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration + +from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase +from semantic_kernel.connectors.ai.nvidia.services.nvidia_handler import NvidiaHandler +from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes +from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings +from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError +from semantic_kernel.utils.feature_stage_decorator import experimental + +logger: logging.Logger = logging.getLogger(__name__) + + +@experimental +class NvidiaChatCompletion(NvidiaHandler, ChatCompletionClientBase): + """NVIDIA Chat completion class.""" + + def __init__( + self, + ai_model_id: str | None = None, + api_key: str | None = None, + base_url: str | None = None, + service_id: str | None = None, + client: AsyncOpenAI | None = None, + env_file_path: str | None = None, + env_file_encoding: str | None = None, + instruction_role: str | None = None, + ) -> None: + """Initialize an NvidiaChatCompletion service. + + Args: + ai_model_id (str): NVIDIA model name, see + https://docs.api.nvidia.com/nim/reference/ + service_id (str | None): Service ID tied to the execution settings. + api_key (str | None): The optional API key to use. If provided will override, + the env vars or .env file value. + base_url (str | None): Custom API endpoint. (Optional) + client (Optional[AsyncOpenAI]): An existing client to use. (Optional) + env_file_path (str | None): Use the environment settings file as a fallback + to environment variables. (Optional) + env_file_encoding (str | None): The encoding of the environment settings file. (Optional) + instruction_role (str | None): The role to use for 'instruction' messages. (Optional) + """ + try: + nvidia_settings = NvidiaSettings( + api_key=api_key, + base_url=base_url, + chat_model_id=ai_model_id, + env_file_path=env_file_path, + env_file_encoding=env_file_encoding, + ) + except ValidationError as ex: + raise ServiceInitializationError("Failed to create NVIDIA settings.", ex) from ex + + if not client and not nvidia_settings.api_key: + raise ServiceInitializationError("The NVIDIA API key is required.") + if not nvidia_settings.chat_model_id: + nvidia_settings.chat_model_id = "nvidia/nv-llama3-8b-instruct" + logger.warning(f"Default chat model set as: {nvidia_settings.chat_model_id}") + + # Create client if not provided + if not client: + client = AsyncOpenAI( + api_key=nvidia_settings.api_key.get_secret_value() if nvidia_settings.api_key else None, + base_url=nvidia_settings.base_url, + ) + + super().__init__( + ai_model_id=nvidia_settings.chat_model_id, + api_key=nvidia_settings.api_key.get_secret_value() if nvidia_settings.api_key else None, + base_url=nvidia_settings.base_url, + service_id=service_id or "", + ai_model_type=NvidiaModelTypes.CHAT, + client=client, + instruction_role=instruction_role or "system", + ) + + @classmethod + def from_dict(cls : type["NvidiaChatCompletion"], settings: dict[str, Any]) -> "NvidiaChatCompletion": + """Initialize an NVIDIA service from a dictionary of settings. + + Args: + settings: A dictionary of settings for the service. + """ + return cls( + ai_model_id=settings.get("ai_model_id"), + api_key=settings.get("api_key"), + base_url=settings.get("base_url"), + service_id=settings.get("service_id"), + env_file_path=settings.get("env_file_path"), + ) + + @override + def get_prompt_execution_settings_class(self) -> type["PromptExecutionSettings"]: + return NvidiaChatPromptExecutionSettings + + @override + @trace_chat_completion("nvidia") + async def _inner_get_chat_message_contents( + self, + chat_history: "ChatHistory", + settings: "PromptExecutionSettings", + ) -> list["ChatMessageContent"]: + if not isinstance(settings, NvidiaChatPromptExecutionSettings): + settings = self.get_prompt_execution_settings_from_settings(settings) + assert isinstance(settings, NvidiaChatPromptExecutionSettings) # nosec + + settings.stream = False + settings.messages = self._prepare_chat_history_for_request(chat_history) + settings.ai_model_id = settings.ai_model_id or self.ai_model_id + + response = await self._send_request(settings) + assert isinstance(response, ChatCompletion) # nosec + response_metadata = self._get_metadata_from_chat_response(response) + return [self._create_chat_message_content(response, choice, response_metadata) for choice in response.choices] + + @override + @trace_streaming_chat_completion("nvidia") + async def _inner_get_streaming_chat_message_contents( + self, + chat_history: "ChatHistory", + settings: "PromptExecutionSettings", + function_invoke_attempt: int = 0, + ) -> AsyncGenerator[list["StreamingChatMessageContent"], Any]: + if not isinstance(settings, NvidiaChatPromptExecutionSettings): + settings = self.get_prompt_execution_settings_from_settings(settings) + assert isinstance(settings, NvidiaChatPromptExecutionSettings) # nosec + + settings.stream = True + settings.messages = self._prepare_chat_history_for_request(chat_history) + settings.ai_model_id = settings.ai_model_id or self.ai_model_id + + response = await self._send_request(settings) + assert isinstance(response, AsyncGenerator) # nosec + + async for chunk in response: + if len(chunk.choices) == 0: + continue + chunk_metadata = self._get_metadata_from_chat_response(chunk) + yield [ + self._create_streaming_chat_message_content(chunk, choice, chunk_metadata, function_invoke_attempt) + for choice in chunk.choices + ] + + def _create_chat_message_content( + self, response: ChatCompletion, choice: Choice, response_metadata: dict[str, Any] + ) -> "ChatMessageContent": + """Create a chat message content object from a choice.""" + metadata = self._get_metadata_from_chat_choice(choice) + metadata.update(response_metadata) + + items: list[Any] = self._get_tool_calls_from_chat_choice(choice) + items.extend(self._get_function_call_from_chat_choice(choice)) + if choice.message.content: + items.append(TextContent(text=choice.message.content)) + + return ChatMessageContent( + inner_content=response, + ai_model_id=self.ai_model_id, + metadata=metadata, + role=AuthorRole(choice.message.role), + items=items, + finish_reason=(FinishReason(choice.finish_reason) if choice.finish_reason else None), + ) + + def _create_streaming_chat_message_content( + self, + chunk: ChatCompletionChunk, + choice: ChunkChoice, + chunk_metadata: dict[str, Any], + function_invoke_attempt: int, + ) -> StreamingChatMessageContent: + """Create a streaming chat message content object from a choice.""" + metadata = self._get_metadata_from_chat_choice(choice) + metadata.update(chunk_metadata) + + items: list[Any] = self._get_tool_calls_from_chat_choice(choice) + items.extend(self._get_function_call_from_chat_choice(choice)) + if choice.delta and choice.delta.content is not None: + items.append(StreamingTextContent(choice_index=choice.index, text=choice.delta.content)) + return StreamingChatMessageContent( + choice_index=choice.index, + inner_content=chunk, + ai_model_id=self.ai_model_id, + metadata=metadata, + role=(AuthorRole(choice.delta.role) if choice.delta and choice.delta.role else AuthorRole.ASSISTANT), + finish_reason=(FinishReason(choice.finish_reason) if choice.finish_reason else None), + items=items, + function_invoke_attempt=function_invoke_attempt, + ) + + def _get_metadata_from_chat_response(self, response: ChatCompletion | ChatCompletionChunk) -> dict[str, Any]: + """Get metadata from a chat response.""" + return { + "id": response.id, + "created": response.created, + "system_fingerprint": getattr(response, "system_fingerprint", None), + "usage": CompletionUsage.from_openai(response.usage) if response.usage is not None else None, + } + + def _get_metadata_from_chat_choice(self, choice: Choice | ChunkChoice) -> dict[str, Any]: + """Get metadata from a chat choice.""" + return { + "logprobs": getattr(choice, "logprobs", None), + } + + def _get_tool_calls_from_chat_choice(self, choice: Choice | ChunkChoice) -> list[FunctionCallContent]: + """Get tool calls from a chat choice.""" + content = choice.message if isinstance(choice, Choice) else choice.delta + if content and (tool_calls := getattr(content, "tool_calls", None)) is not None: + return [ + FunctionCallContent( + id=tool.id, + index=getattr(tool, "index", None), + name=tool.function.name, + arguments=tool.function.arguments, + ) + for tool in tool_calls + ] + return [] + + def _get_function_call_from_chat_choice(self, choice: Choice | ChunkChoice) -> list[FunctionCallContent]: + """Get function calls from a chat choice.""" + content = choice.message if isinstance(choice, Choice) else choice.delta + if content and (function_call := getattr(content, "function_call", None)) is not None: + return [ + FunctionCallContent( + id="", + name=function_call.name, + arguments=function_call.arguments, + ) + ] + return [] + + def _prepare_chat_history_for_request( + self, + chat_history: ChatHistory, + role_key: str = "role", + content_key: str = "content", + ) -> list[dict[str, str]]: + """Prepare chat history for request.""" + messages = [] + for message in chat_history.messages: + message_dict = {role_key: message.role.value, content_key: message.content} + messages.append(message_dict) + return messages \ No newline at end of file diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py index 4aed491c9e52..0bd4800dee12 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py @@ -5,7 +5,10 @@ from typing import Any, ClassVar, Union from openai import AsyncOpenAI, AsyncStream -from openai.types import CreateEmbeddingResponse +from openai.types.chat.chat_completion import ChatCompletion +from openai.types.completion import Completion +from openai.types.create_embedding_response import CreateEmbeddingResponse +from openai.types.chat.chat_completion_chunk import ChatCompletionChunk from semantic_kernel.connectors.ai.nvidia import ( NvidiaPromptExecutionSettings, @@ -18,7 +21,7 @@ logger: logging.Logger = logging.getLogger(__name__) -RESPONSE_TYPE = Union[list[Any],] +RESPONSE_TYPE = Union[list[Any], ChatCompletion, Completion, AsyncStream[Any]] class NvidiaHandler(KernelBaseModel, ABC): @@ -26,18 +29,23 @@ class NvidiaHandler(KernelBaseModel, ABC): MODEL_PROVIDER_NAME: ClassVar[str] = "nvidia" client: AsyncOpenAI - ai_model_type: NvidiaModelTypes = ( - NvidiaModelTypes.EMBEDDING - ) # TODO: revert this to chat after adding support for chat-compl # noqa: TD002 - prompt_tokens: int = 0 + ai_model_type: NvidiaModelTypes = NvidiaModelTypes.CHAT completion_tokens: int = 0 total_tokens: int = 0 + prompt_tokens: int = 0 async def _send_request(self, settings: PromptExecutionSettings) -> RESPONSE_TYPE: """Send a request to the Nvidia API.""" if self.ai_model_type == NvidiaModelTypes.EMBEDDING: assert isinstance(settings, NvidiaPromptExecutionSettings) # nosec return await self._send_embedding_request(settings) + elif self.ai_model_type == NvidiaModelTypes.CHAT: + assert isinstance(settings, NvidiaPromptExecutionSettings) # nosec + return await self._send_chat_completion_request(settings) + elif self.ai_model_type == NvidiaModelTypes.TEXT: + assert isinstance(settings, NvidiaPromptExecutionSettings) # nosec + return await self._send_text_completion_request(settings) + raise NotImplementedError(f"Model type {self.ai_model_type} is not supported") @@ -55,9 +63,38 @@ async def _send_embedding_request(self, settings: NvidiaPromptExecutionSettings) ex, ) from ex + async def _send_chat_completion_request(self, settings: NvidiaPromptExecutionSettings) -> ChatCompletion | AsyncStream[Any]: + """Send a request to the NVIDIA chat completion endpoint.""" + try: + response = await self.client.chat.completions.create(**settings.prepare_settings_dict()) + self.store_usage(response) + return response + except Exception as ex: + raise ServiceResponseException( + f"{type(self)} service failed to complete the chat", + ex, + ) from ex + + async def _send_text_completion_request(self, settings: NvidiaPromptExecutionSettings) -> Completion | AsyncStream[Any]: + """Send a request to the NVIDIA text completion endpoint.""" + try: + settings_dict = settings.prepare_settings_dict() + response = await self.client.completions.create(**settings_dict) + self.store_usage(response) + return response + except Exception as ex: + raise ServiceResponseException( + f"{type(self)} service failed to complete the text", + ex, + ) from ex + def store_usage( self, - response: CreateEmbeddingResponse, + response: ChatCompletion + | Completion + | AsyncStream[ChatCompletionChunk] + | AsyncStream[Completion] + | CreateEmbeddingResponse, ): """Store the usage information from the response.""" if not isinstance(response, AsyncStream) and response.usage: diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py index 4e3e12c6b71b..3582a385c951 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py @@ -4,6 +4,8 @@ class NvidiaModelTypes(Enum): - """Nvidia model types, can be text, chat or embedding.""" + """Nvidia model types, can be text, chat, or embedding.""" EMBEDDING = "embedding" + CHAT = "chat" + TEXT = "text" diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py index 1d828963d896..dddf32b00971 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py @@ -15,7 +15,7 @@ from openai import AsyncOpenAI from pydantic import ValidationError -from semantic_kernel.connectors.ai.embeddings.embedding_generator_base import EmbeddingGeneratorBase +from semantic_kernel.connectors.ai.embedding_generator_base import EmbeddingGeneratorBase from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( NvidiaEmbeddingPromptExecutionSettings, ) diff --git a/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py b/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py index fb132df95ab1..4fdae0c9144d 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py +++ b/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py @@ -29,6 +29,7 @@ class NvidiaSettings(KernelBaseSettings): env_prefix: ClassVar[str] = "NVIDIA_" - api_key: SecretStr + api_key: SecretStr base_url: str = "https://integrate.api.nvidia.com/v1" - embedding_model_id: str | None + embedding_model_id: str | None = None + chat_model_id: str | None = None From 557ab76effe8d7a9a75107ba9551934d86b15d4f Mon Sep 17 00:00:00 2001 From: Soumili Nandi Date: Wed, 30 Jul 2025 10:19:02 -0700 Subject: [PATCH 02/11] minor fixes --- .../connectors/ai/nvidia/services/nvidia_chat_completion.py | 2 +- .../connectors/ai/nvidia/settings/nvidia_settings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py index 292292b6688b..41db8bd97192 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py @@ -93,7 +93,7 @@ def __init__( if not client and not nvidia_settings.api_key: raise ServiceInitializationError("The NVIDIA API key is required.") if not nvidia_settings.chat_model_id: - nvidia_settings.chat_model_id = "nvidia/nv-llama3-8b-instruct" + nvidia_settings.chat_model_id = "meta/llama3-8b-instruct" logger.warning(f"Default chat model set as: {nvidia_settings.chat_model_id}") # Create client if not provided diff --git a/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py b/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py index 4fdae0c9144d..68616954823e 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py +++ b/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py @@ -29,7 +29,7 @@ class NvidiaSettings(KernelBaseSettings): env_prefix: ClassVar[str] = "NVIDIA_" - api_key: SecretStr + api_key: SecretStr | None = None base_url: str = "https://integrate.api.nvidia.com/v1" embedding_model_id: str | None = None chat_model_id: str | None = None From bac376ce39ee5c612aa567de82369d520b22b766 Mon Sep 17 00:00:00 2001 From: Soumili Nandi Date: Wed, 30 Jul 2025 10:55:57 -0700 Subject: [PATCH 03/11] added support for structured output --- .../nvidia_prompt_execution_settings.py | 10 ++++--- .../nvidia/services/nvidia_chat_completion.py | 27 ++++++++++++++++++- .../ai/nvidia/services/nvidia_handler.py | 27 +++++++------------ .../ai/nvidia/services/nvidia_model_types.py | 3 +-- 4 files changed, 43 insertions(+), 24 deletions(-) diff --git a/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py index 159473e7e277..d35d9a1b6976 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py +++ b/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py @@ -2,7 +2,7 @@ from typing import Annotated, Any, Literal -from pydantic import Field +from pydantic import Field, BaseModel from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings @@ -54,17 +54,21 @@ class NvidiaChatPromptExecutionSettings(NvidiaPromptExecutionSettings): user: str | None = None tools: list[dict[str, Any]] | None = None tool_choice: str | dict[str, Any] | None = None - response_format: dict[str, str] | None = None + response_format: ( + dict[Literal["type"], Literal["text", "json_object"]] | dict[str, Any] | type[BaseModel] | type | None + ) = None thinking_mode: bool | None = None seed: int | None = None extra_headers: dict | None = None extra_body: dict | None = None timeout: float | None = None + # NVIDIA-specific structured output support + nvext: dict[str, Any] | None = None def prepare_settings_dict(self, **kwargs) -> dict[str, Any]: """Override only for embeddings to exclude input_type and truncate.""" return self.model_dump( - exclude={"service_id", "extension_data", "structured_json_response", "thinking_mode"}, + exclude={"service_id", "extension_data", "structured_json_response", "thinking_mode", "response_format"}, exclude_none=True, by_alias=True, ) \ No newline at end of file diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py index 41db8bd97192..3e0544b1fbd9 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py @@ -93,7 +93,7 @@ def __init__( if not client and not nvidia_settings.api_key: raise ServiceInitializationError("The NVIDIA API key is required.") if not nvidia_settings.chat_model_id: - nvidia_settings.chat_model_id = "meta/llama3-8b-instruct" + nvidia_settings.chat_model_id = "meta/llama-3.1-8b-instruct" logger.warning(f"Default chat model set as: {nvidia_settings.chat_model_id}") # Create client if not provided @@ -147,6 +147,9 @@ async def _inner_get_chat_message_contents( settings.messages = self._prepare_chat_history_for_request(chat_history) settings.ai_model_id = settings.ai_model_id or self.ai_model_id + # Handle structured output + self._handle_structured_output(settings) + response = await self._send_request(settings) assert isinstance(response, ChatCompletion) # nosec response_metadata = self._get_metadata_from_chat_response(response) @@ -168,6 +171,9 @@ async def _inner_get_streaming_chat_message_contents( settings.messages = self._prepare_chat_history_for_request(chat_history) settings.ai_model_id = settings.ai_model_id or self.ai_model_id + # Handle structured output + self._handle_structured_output(settings) + response = await self._send_request(settings) assert isinstance(response, AsyncGenerator) # nosec @@ -270,6 +276,25 @@ def _get_function_call_from_chat_choice(self, choice: Choice | ChunkChoice) -> l ] return [] + def _handle_structured_output( + self, request_settings: NvidiaChatPromptExecutionSettings + ) -> None: + """Handle structured output for NVIDIA models using nvext parameter.""" + response_format = getattr(request_settings, "response_format", None) + if response_format: + # Convert Pydantic model to JSON schema for NVIDIA's guided_json + if hasattr(response_format, "model_json_schema"): + # It's a Pydantic model + schema = response_format.model_json_schema() + if not request_settings.extra_body: + request_settings.extra_body = {} + request_settings.extra_body["nvext"] = {"guided_json": schema} + elif isinstance(response_format, dict): + # It's already a dict, use it directly + if not request_settings.extra_body: + request_settings.extra_body = {} + request_settings.extra_body["nvext"] = {"guided_json": response_format} + def _prepare_chat_history_for_request( self, chat_history: ChatHistory, diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py index 0bd4800dee12..88e5882f973a 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py @@ -42,10 +42,6 @@ async def _send_request(self, settings: PromptExecutionSettings) -> RESPONSE_TYP elif self.ai_model_type == NvidiaModelTypes.CHAT: assert isinstance(settings, NvidiaPromptExecutionSettings) # nosec return await self._send_chat_completion_request(settings) - elif self.ai_model_type == NvidiaModelTypes.TEXT: - assert isinstance(settings, NvidiaPromptExecutionSettings) # nosec - return await self._send_text_completion_request(settings) - raise NotImplementedError(f"Model type {self.ai_model_type} is not supported") @@ -65,26 +61,21 @@ async def _send_embedding_request(self, settings: NvidiaPromptExecutionSettings) async def _send_chat_completion_request(self, settings: NvidiaPromptExecutionSettings) -> ChatCompletion | AsyncStream[Any]: """Send a request to the NVIDIA chat completion endpoint.""" - try: - response = await self.client.chat.completions.create(**settings.prepare_settings_dict()) - self.store_usage(response) - return response - except Exception as ex: - raise ServiceResponseException( - f"{type(self)} service failed to complete the chat", - ex, - ) from ex - - async def _send_text_completion_request(self, settings: NvidiaPromptExecutionSettings) -> Completion | AsyncStream[Any]: - """Send a request to the NVIDIA text completion endpoint.""" try: settings_dict = settings.prepare_settings_dict() - response = await self.client.completions.create(**settings_dict) + + # Handle structured output if nvext is present in extra_body + if settings.extra_body and "nvext" in settings.extra_body: + if "extra_body" not in settings_dict: + settings_dict["extra_body"] = {} + settings_dict["extra_body"]["nvext"] = settings.extra_body["nvext"] + + response = await self.client.chat.completions.create(**settings_dict) self.store_usage(response) return response except Exception as ex: raise ServiceResponseException( - f"{type(self)} service failed to complete the text", + f"{type(self)} service failed to complete the chat", ex, ) from ex diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py index 3582a385c951..7dc37d4f9de3 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py @@ -7,5 +7,4 @@ class NvidiaModelTypes(Enum): """Nvidia model types, can be text, chat, or embedding.""" EMBEDDING = "embedding" - CHAT = "chat" - TEXT = "text" + CHAT = "chat" \ No newline at end of file From d7065957aadbe6ed3986cc52771ac8c00bc7b29c Mon Sep 17 00:00:00 2001 From: Soumili Nandi Date: Wed, 30 Jul 2025 12:05:34 -0700 Subject: [PATCH 04/11] Add test cases and readme --- .../connectors/ai/nvidia/README.md | 42 ++++- .../test_nvidia_prompt_execution_settings.py | 120 ++++++++++++++ .../services/test_nvidia_chat_completion.py | 114 ++++++++++++++ .../ai/nvidia/services/test_nvidia_handler.py | 148 ++++++++++++++++++ .../nvidia/settings/test_nvidia_settings.py | 58 +++++++ 5 files changed, 481 insertions(+), 1 deletion(-) create mode 100644 python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py create mode 100644 python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py create mode 100644 python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py create mode 100644 python/tests/unit/connectors/ai/nvidia/settings/test_nvidia_settings.py diff --git a/python/semantic_kernel/connectors/ai/nvidia/README.md b/python/semantic_kernel/connectors/ai/nvidia/README.md index 989446d06a05..cddf8734934a 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/README.md +++ b/python/semantic_kernel/connectors/ai/nvidia/README.md @@ -1,6 +1,6 @@ # semantic_kernel.connectors.ai.nvidia -This connector enables integration with NVIDIA NIM API for text embeddings. It allows you to use NVIDIA's embedding models within the Semantic Kernel framework. +This connector enables integration with NVIDIA NIM API for text embeddings and chat completion. It allows you to use NVIDIA's models within the Semantic Kernel framework. ## Quick start @@ -13,6 +13,8 @@ kernel = sk.Kernel() ### Add NVIDIA text embedding service You can provide your API key directly or through environment variables ```python +from semantic_kernel.connectors.ai.nvidia import NvidiaTextEmbedding + embedding_service = NvidiaTextEmbedding( ai_model_id="nvidia/nv-embedqa-e5-v5", # Default model if not specified api_key="your-nvidia-api-key", # Can also use NVIDIA_API_KEY env variable @@ -30,3 +32,41 @@ kernel.add_service(embedding_service) texts = ["Hello, world!", "Semantic Kernel is awesome"] embeddings = await kernel.get_service("nvidia-embeddings").generate_embeddings(texts) ``` + +### Add NVIDIA chat completion service +```python +from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion + +chat_service = NvidiaChatCompletion( + ai_model_id="meta/llama-3.1-8b-instruct", # Default model if not specified + api_key="your-nvidia-api-key", # Can also use NVIDIA_API_KEY env variable + service_id="nvidia-chat" # Optional service identifier +) +kernel.add_service(chat_service) +``` + +### Basic chat completion +```python +from semantic_kernel.contents import ChatHistory +from semantic_kernel.contents.utils.author_role import AuthorRole + +chat_history = ChatHistory() +chat_history.add_message(AuthorRole.USER, "Hello, how are you?") +response = await kernel.get_service("nvidia-chat").get_chat_message_content(chat_history) +print(response.content) +``` + +### Using with Chat Completion Agent +```python +from semantic_kernel.agents import ChatCompletionAgent +from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion + +agent = ChatCompletionAgent( + service=NvidiaChatCompletion(), + name="SK-Assistant", + instructions="You are a helpful assistant.", +) +response = await agent.get_response(messages="Write a haiku about Semantic Kernel.") +print(response.content) +``` + diff --git a/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py b/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py new file mode 100644 index 000000000000..291bb8a20a12 --- /dev/null +++ b/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py @@ -0,0 +1,120 @@ +# Copyright (c) Microsoft. All rights reserved. + +import pytest +from pydantic import BaseModel, ValidationError + +from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( + NvidiaPromptExecutionSettings, + NvidiaChatPromptExecutionSettings, + NvidiaEmbeddingPromptExecutionSettings, +) + + +class TestNvidiaPromptExecutionSettings: + """Test cases for NvidiaPromptExecutionSettings.""" + + def test_init_with_defaults(self): + """Test initialization with default values.""" + settings = NvidiaPromptExecutionSettings() + assert settings.ai_model_id is None + assert settings.temperature is None + assert settings.stream is False + + def test_init_with_values(self): + """Test initialization with specific values.""" + settings = NvidiaPromptExecutionSettings( + ai_model_id="test-model", + temperature=0.7, + max_tokens=100, + ) + assert settings.ai_model_id == "test-model" + assert settings.temperature == 0.7 + assert settings.max_tokens == 100 + + def test_validation_temperature_range(self): + """Test temperature validation range.""" + # Valid values + settings = NvidiaPromptExecutionSettings(temperature=0.0) + assert settings.temperature == 0.0 + + settings = NvidiaPromptExecutionSettings(temperature=2.0) + assert settings.temperature == 2.0 + + # Invalid values + with pytest.raises(ValidationError): + NvidiaPromptExecutionSettings(temperature=-0.1) + + with pytest.raises(ValidationError): + NvidiaPromptExecutionSettings(temperature=2.1) + + +class TestNvidiaChatPromptExecutionSettings: + """Test cases for NvidiaChatPromptExecutionSettings.""" + + def test_init_with_defaults(self): + """Test initialization with default values.""" + settings = NvidiaChatPromptExecutionSettings() + assert settings.messages is None + assert settings.response_format is None + assert settings.structured_json_response is False + + def test_response_format_with_pydantic_model(self): + """Test response_format with Pydantic model.""" + class TestModel(BaseModel): + name: str + value: int + + settings = NvidiaChatPromptExecutionSettings( + response_format=TestModel + ) + + assert settings.response_format == TestModel + assert settings.structured_json_response is True + + def test_response_format_with_dict(self): + """Test response_format with dictionary.""" + settings = NvidiaChatPromptExecutionSettings( + response_format={"type": "json_object"} + ) + + assert settings.response_format == {"type": "json_object"} + assert settings.structured_json_response is False + + +class TestNvidiaEmbeddingPromptExecutionSettings: + """Test cases for NvidiaEmbeddingPromptExecutionSettings.""" + + def test_init_with_defaults(self): + """Test initialization with default values.""" + settings = NvidiaEmbeddingPromptExecutionSettings() + assert settings.input is None + assert settings.encoding_format == "float" + assert settings.input_type == "query" + assert settings.truncate == "NONE" + + def test_init_with_values(self): + """Test initialization with specific values.""" + settings = NvidiaEmbeddingPromptExecutionSettings( + input=["hello", "world"], + encoding_format="base64", + input_type="passage", + truncate="START", + ) + + assert settings.input == ["hello", "world"] + assert settings.encoding_format == "base64" + assert settings.input_type == "passage" + assert settings.truncate == "START" + + def test_validation_encoding_format(self): + """Test encoding_format validation.""" + # Valid values + settings = NvidiaEmbeddingPromptExecutionSettings(encoding_format="float") + assert settings.encoding_format == "float" + + settings = NvidiaEmbeddingPromptExecutionSettings(encoding_format="base64") + assert settings.encoding_format == "base64" + + # Invalid values + with pytest.raises(ValidationError): + NvidiaEmbeddingPromptExecutionSettings(encoding_format="invalid") \ No newline at end of file diff --git a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py new file mode 100644 index 000000000000..22e563416fee --- /dev/null +++ b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py @@ -0,0 +1,114 @@ +# Copyright (c) Microsoft. All rights reserved. + +import pytest +from unittest.mock import AsyncMock, MagicMock, patch +from pydantic import BaseModel + +from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion +from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( + NvidiaChatPromptExecutionSettings, +) +from semantic_kernel.contents import ChatHistory +from semantic_kernel.contents.author_role import AuthorRole + + +@pytest.fixture +def mock_openai_client(): + """Create a mock OpenAI client.""" + with patch("semantic_kernel.connectors.ai.nvidia.services.nvidia_chat_completion.AsyncOpenAI") as mock_client: + mock_client.return_value = AsyncMock() + yield mock_client.return_value + + +@pytest.fixture +def nvidia_chat_completion(mock_openai_client): + """Create a NvidiaChatCompletion instance with mocked client.""" + return NvidiaChatCompletion( + ai_model_id="meta/llama-3.1-8b-instruct", + api_key="test-api-key", + ) + + +class TestNvidiaChatCompletion: + """Test cases for NvidiaChatCompletion.""" + + def test_init_with_defaults(self): + """Test initialization with default values.""" + with patch("semantic_kernel.connectors.ai.nvidia.services.nvidia_chat_completion.AsyncOpenAI"): + service = NvidiaChatCompletion(api_key="test-key") + assert service.ai_model_id == "meta/llama-3.1-8b-instruct" + + def test_get_prompt_execution_settings_class(self, nvidia_chat_completion): + """Test getting the prompt execution settings class.""" + from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import NvidiaChatPromptExecutionSettings + assert nvidia_chat_completion.get_prompt_execution_settings_class() == NvidiaChatPromptExecutionSettings + + @pytest.mark.asyncio + async def test_get_chat_message_contents(self, nvidia_chat_completion, mock_openai_client): + """Test basic chat completion.""" + # Mock the response + mock_response = MagicMock() + mock_response.choices = [ + MagicMock( + message=MagicMock(role="assistant", content="Hello!"), + finish_reason="stop", + ) + ] + mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20, total_tokens=30) + mock_openai_client.chat.completions.create.return_value = mock_response + + # Test + chat_history = ChatHistory() + chat_history.add_message(AuthorRole.USER, "Hello") + settings = NvidiaChatPromptExecutionSettings() + + result = await nvidia_chat_completion.get_chat_message_contents(chat_history, settings) + + assert len(result) == 1 + assert result[0].content == "Hello!" + + @pytest.mark.asyncio + async def test_structured_output_with_pydantic_model(self, nvidia_chat_completion, mock_openai_client): + """Test structured output with Pydantic model.""" + # Define test model + class TestModel(BaseModel): + name: str + value: int + + # Mock response + mock_response = MagicMock() + mock_response.choices = [ + MagicMock( + message=MagicMock(role="assistant", content='{"name": "test", "value": 42}'), + finish_reason="stop", + ) + ] + mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20, total_tokens=30) + mock_openai_client.chat.completions.create.return_value = mock_response + + # Test + chat_history = ChatHistory() + chat_history.add_message(AuthorRole.USER, "Give me structured data") + settings = NvidiaChatPromptExecutionSettings() + settings.response_format = TestModel + + result = await nvidia_chat_completion.get_chat_message_contents(chat_history, settings) + + # Verify nvext was passed + call_args = mock_openai_client.chat.completions.create.call_args[1] + assert "extra_body" in call_args + assert "nvext" in call_args["extra_body"] + assert "guided_json" in call_args["extra_body"]["nvext"] + + @pytest.mark.asyncio + async def test_error_handling(self, nvidia_chat_completion, mock_openai_client): + """Test error handling.""" + mock_openai_client.chat.completions.create.side_effect = Exception("API Error") + + chat_history = ChatHistory() + chat_history.add_message(AuthorRole.USER, "Hello") + settings = NvidiaChatPromptExecutionSettings() + + from semantic_kernel.exceptions import ServiceResponseException + with pytest.raises(ServiceResponseException): + await nvidia_chat_completion.get_chat_message_contents(chat_history, settings) \ No newline at end of file diff --git a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py new file mode 100644 index 000000000000..ddb3e766eb1a --- /dev/null +++ b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py @@ -0,0 +1,148 @@ +# Copyright (c) Microsoft. All rights reserved. + +import pytest +from unittest.mock import AsyncMock, MagicMock, patch +from openai import AsyncOpenAI + +from semantic_kernel.connectors.ai.nvidia.services.nvidia_handler import NvidiaHandler +from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes +from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( + NvidiaChatPromptExecutionSettings, + NvidiaEmbeddingPromptExecutionSettings, +) + + +@pytest.fixture +def mock_openai_client(): + """Create a mock OpenAI client.""" + client = AsyncMock(spec=AsyncOpenAI) + return client + + +@pytest.fixture +def nvidia_handler(mock_openai_client): + """Create a NvidiaHandler instance with mocked client.""" + return NvidiaHandler( + client=mock_openai_client, + ai_model_type=NvidiaModelTypes.CHAT, + ai_model_id="test-model", + api_key="test-key", + ) + + +class TestNvidiaHandler: + """Test cases for NvidiaHandler.""" + + def test_init(self, mock_openai_client): + """Test initialization.""" + handler = NvidiaHandler( + client=mock_openai_client, + ai_model_type=NvidiaModelTypes.CHAT, + ai_model_id="test-model", + api_key="test-key", + ) + + assert handler.client == mock_openai_client + assert handler.ai_model_type == NvidiaModelTypes.CHAT + assert handler.ai_model_id == "test-model" + assert handler.MODEL_PROVIDER_NAME == "nvidia" + + @pytest.mark.asyncio + async def test_send_chat_completion_request(self, nvidia_handler, mock_openai_client): + """Test sending chat completion request.""" + # Mock the response + mock_response = MagicMock() + mock_response.choices = [ + MagicMock( + message=MagicMock(role="assistant", content="Hello!"), + finish_reason="stop", + ) + ] + mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20, total_tokens=30) + mock_openai_client.chat.completions.create.return_value = mock_response + + # Create settings + settings = NvidiaChatPromptExecutionSettings( + messages=[{"role": "user", "content": "Hello"}], + model="test-model", + ) + + # Test the method + result = await nvidia_handler._send_chat_completion_request(settings) + assert result == mock_response + + # Verify usage was stored + assert nvidia_handler.prompt_tokens == 10 + assert nvidia_handler.completion_tokens == 20 + assert nvidia_handler.total_tokens == 30 + + @pytest.mark.asyncio + async def test_send_chat_completion_request_with_nvext(self, nvidia_handler, mock_openai_client): + """Test sending chat completion request with nvext parameter.""" + # Mock the response + mock_response = MagicMock() + mock_response.choices = [ + MagicMock( + message=MagicMock(role="assistant", content='{"result": "success"}'), + finish_reason="stop", + ) + ] + mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20, total_tokens=30) + mock_openai_client.chat.completions.create.return_value = mock_response + + # Create settings with nvext + settings = NvidiaChatPromptExecutionSettings( + messages=[{"role": "user", "content": "Give me JSON"}], + model="test-model", + extra_body={"nvext": {"guided_json": {"type": "object"}}}, + ) + + # Test the method + result = await nvidia_handler._send_chat_completion_request(settings) + assert result == mock_response + + # Verify the client was called with nvext in extra_body + call_args = mock_openai_client.chat.completions.create.call_args[1] + assert "extra_body" in call_args + assert "nvext" in call_args["extra_body"] + assert call_args["extra_body"]["nvext"] == {"guided_json": {"type": "object"}} + + @pytest.mark.asyncio + async def test_send_embedding_request(self, mock_openai_client): + """Test sending embedding request.""" + handler = NvidiaHandler( + client=mock_openai_client, + ai_model_type=NvidiaModelTypes.EMBEDDING, + ai_model_id="test-model", + ) + + # Mock the response + mock_response = MagicMock() + mock_response.data = [ + MagicMock(embedding=[0.1, 0.2, 0.3]), + MagicMock(embedding=[0.4, 0.5, 0.6]), + ] + mock_response.usage = MagicMock(prompt_tokens=10, total_tokens=10) + mock_openai_client.embeddings.create.return_value = mock_response + + # Create settings + settings = NvidiaEmbeddingPromptExecutionSettings( + input=["hello", "world"], + model="test-model", + ) + + # Test the method + result = await handler._send_embedding_request(settings) + assert result == [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] + + @pytest.mark.asyncio + async def test_send_request_unsupported_model_type(self, nvidia_handler): + """Test send_request with unsupported model type.""" + nvidia_handler.ai_model_type = "UNSUPPORTED" + settings = NvidiaChatPromptExecutionSettings( + messages=[{"role": "user", "content": "Hello"}], + model="test-model", + ) + + with pytest.raises(NotImplementedError, match="Model type UNSUPPORTED is not supported"): + await nvidia_handler._send_request(settings) \ No newline at end of file diff --git a/python/tests/unit/connectors/ai/nvidia/settings/test_nvidia_settings.py b/python/tests/unit/connectors/ai/nvidia/settings/test_nvidia_settings.py new file mode 100644 index 000000000000..739a8f4e9c15 --- /dev/null +++ b/python/tests/unit/connectors/ai/nvidia/settings/test_nvidia_settings.py @@ -0,0 +1,58 @@ +# Copyright (c) Microsoft. All rights reserved. + +import pytest +from pydantic import ValidationError + +from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings + + +class TestNvidiaSettings: + """Test cases for NvidiaSettings.""" + + def test_init_with_defaults(self): + """Test initialization with default values.""" + settings = NvidiaSettings() + assert settings.api_key is None + assert settings.base_url == "https://integrate.api.nvidia.com/v1" + assert settings.embedding_model_id is None + assert settings.chat_model_id is None + + def test_init_with_values(self): + """Test initialization with specific values.""" + settings = NvidiaSettings( + api_key="test-api-key", + base_url="https://custom.nvidia.com/v1", + embedding_model_id="test-embedding-model", + chat_model_id="test-chat-model", + ) + + assert settings.api_key.get_secret_value() == "test-api-key" + assert settings.base_url == "https://custom.nvidia.com/v1" + assert settings.embedding_model_id == "test-embedding-model" + assert settings.chat_model_id == "test-chat-model" + + def test_env_prefix(self): + """Test environment variable prefix.""" + assert NvidiaSettings.env_prefix == "NVIDIA_" + + def test_api_key_secret_str(self): + """Test that api_key is properly handled as SecretStr.""" + settings = NvidiaSettings(api_key="secret-key") + + # Should be SecretStr type + assert hasattr(settings.api_key, 'get_secret_value') + assert settings.api_key.get_secret_value() == "secret-key" + + # Should not expose the secret in string representation + str_repr = str(settings) + assert "secret-key" not in str_repr + + def test_environment_variables(self, monkeypatch): + """Test that environment variables override defaults.""" + monkeypatch.setenv("NVIDIA_API_KEY", "env-key") + monkeypatch.setenv("NVIDIA_CHAT_MODEL_ID", "env-chat") + + settings = NvidiaSettings() + + assert settings.api_key.get_secret_value() == "env-key" + assert settings.chat_model_id == "env-chat" \ No newline at end of file From 72d0d06de85c6520fc5792420ab0cb54d9f731bc Mon Sep 17 00:00:00 2001 From: Soumili Nandi Date: Tue, 9 Sep 2025 15:53:03 -0700 Subject: [PATCH 05/11] resolving comments --- python/samples/concepts/setup/ALL_SETTINGS.md | 3 +- .../setup/chat_completion_services.py | 26 ++++++++++++++++ .../nvidia/services/nvidia_chat_completion.py | 31 ++++++++++++------- 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/python/samples/concepts/setup/ALL_SETTINGS.md b/python/samples/concepts/setup/ALL_SETTINGS.md index ea7f911a5bb1..d185d1b92cf5 100644 --- a/python/samples/concepts/setup/ALL_SETTINGS.md +++ b/python/samples/concepts/setup/ALL_SETTINGS.md @@ -30,7 +30,8 @@ | | [VertexAITextEmbedding](../../../semantic_kernel/connectors/ai/google/google_ai/services/google_ai_text_embedding.py) | project_id,
region,
embedding_model_id | VERTEX_AI_PROJECT_ID,
VERTEX_AI_REGION,
VERTEX_AI_EMBEDDING_MODEL_ID | Yes,
No,
Yes | | | HuggingFace | [HuggingFaceTextCompletion](../../../semantic_kernel/connectors/ai/hugging_face/services/hf_text_completion.py) | ai_model_id | N/A | Yes | | | | [HuggingFaceTextEmbedding](../../../semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py) | ai_model_id | N/A | Yes | | -| NVIDIA NIM | [NvidiaTextEmbedding](../../../semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py) | ai_model_id,
api_key,
base_url | NVIDIA_API_KEY,
NVIDIA_TEXT_EMBEDDING_MODEL_ID,
NVIDIA_BASE_URL | Yes | [NvidiaAISettings](../../../semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py) | +| NVIDIA NIM | [NvidiaChatCompletion](../../../semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py) | ai_model_id,
api_key,
base_url | NVIDIA_CHAT_MODEL_ID,
NVIDIA_API_KEY,
NVIDIA_BASE_URL | Yes,
Yes,
No | [NvidiaAISettings](../../../semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py) | +| | [NvidiaTextEmbedding](../../../semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py) | ai_model_id,
api_key,
base_url | NVIDIA_API_KEY,
NVIDIA_TEXT_EMBEDDING_MODEL_ID,
NVIDIA_BASE_URL | Yes | [NvidiaAISettings](../../../semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py) | | Mistral AI | [MistralAIChatCompletion](../../../semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py) | ai_model_id,
api_key | MISTRALAI_CHAT_MODEL_ID,
MISTRALAI_API_KEY | Yes,
Yes | [MistralAISettings](../../../semantic_kernel/connectors/ai/mistral_ai/settings/mistral_ai_settings.py) | | | [MistralAITextEmbedding](../../../semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_text_embedding.py) | ai_model_id,
api_key | MISTRALAI_EMBEDDING_MODEL_ID,
MISTRALAI_API_KEY | Yes,
Yes | | | Ollama | [OllamaChatCompletion](../../../semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py) | ai_model_id,
host | OLLAMA_CHAT_MODEL_ID,
OLLAMA_HOST | Yes,
No | [OllamaSettings](../../../semantic_kernel/connectors/ai/ollama/ollama_settings.py) | diff --git a/python/samples/concepts/setup/chat_completion_services.py b/python/samples/concepts/setup/chat_completion_services.py index 4baad81ea968..b8f7fba86893 100644 --- a/python/samples/concepts/setup/chat_completion_services.py +++ b/python/samples/concepts/setup/chat_completion_services.py @@ -24,6 +24,7 @@ class Services(str, Enum): BEDROCK = "bedrock" GOOGLE_AI = "google_ai" MISTRAL_AI = "mistral_ai" + NVIDIA = "nvidia" OLLAMA = "ollama" ONNX = "onnx" VERTEX_AI = "vertex_ai" @@ -60,6 +61,7 @@ def get_chat_completion_service_and_request_settings( Services.BEDROCK: lambda: get_bedrock_chat_completion_service_and_request_settings(), Services.GOOGLE_AI: lambda: get_google_ai_chat_completion_service_and_request_settings(), Services.MISTRAL_AI: lambda: get_mistral_ai_chat_completion_service_and_request_settings(), + Services.NVIDIA: lambda: get_nvidia_chat_completion_service_and_request_settings(), Services.OLLAMA: lambda: get_ollama_chat_completion_service_and_request_settings(), Services.ONNX: lambda: get_onnx_chat_completion_service_and_request_settings(), Services.VERTEX_AI: lambda: get_vertex_ai_chat_completion_service_and_request_settings(), @@ -282,6 +284,30 @@ def get_mistral_ai_chat_completion_service_and_request_settings() -> tuple[ return chat_service, request_settings +def get_nvidia_chat_completion_service_and_request_settings() -> tuple[ + "ChatCompletionClientBase", "PromptExecutionSettings" +]: + """Return NVIDIA chat completion service and request settings. + + The service credentials can be read by 3 ways: + 1. Via the constructor + 2. Via the environment variables + 3. Via an environment file + + The request settings control the behavior of the service. The default settings are sufficient to get started. + However, you can adjust the settings to suit your needs. + Note: Some of the settings are NOT meant to be set by the user. + Please refer to the Semantic Kernel Python documentation for more information: + https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel-python + """ + from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion, NvidiaChatPromptExecutionSettings + + chat_service = NvidiaChatCompletion(service_id=service_id) + request_settings = NvidiaChatPromptExecutionSettings(service_id=service_id) + + return chat_service, request_settings + + def get_ollama_chat_completion_service_and_request_settings() -> tuple[ "ChatCompletionClientBase", "PromptExecutionSettings" ]: diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py index 3e0544b1fbd9..302db1de3e32 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py @@ -3,12 +3,7 @@ import logging import sys from collections.abc import AsyncGenerator -from typing import Any, TYPE_CHECKING - -if sys.version_info >= (3, 12): - from typing import override # pragma: no cover -else: - from typing_extensions import override # pragma: no cover +from typing import Any, Literal, TYPE_CHECKING from openai import AsyncOpenAI from pydantic import ValidationError @@ -36,9 +31,6 @@ trace_streaming_chat_completion, ) -if TYPE_CHECKING: - from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration - from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase from semantic_kernel.connectors.ai.nvidia.services.nvidia_handler import NvidiaHandler from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes @@ -46,12 +38,24 @@ from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError from semantic_kernel.utils.feature_stage_decorator import experimental +if sys.version_info >= (3, 12): + from typing import override # pragma: no cover +else: + from typing_extensions import override # pragma: no cover + +if TYPE_CHECKING: + from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration + logger: logging.Logger = logging.getLogger(__name__) @experimental class NvidiaChatCompletion(NvidiaHandler, ChatCompletionClientBase): - """NVIDIA Chat completion class.""" + """NVIDIA Chat completion class. + + This class does not support function calling. The SUPPORTS_FUNCTION_CALLING attribute + is set to False (inherited from the base class). + """ def __init__( self, @@ -62,13 +66,14 @@ def __init__( client: AsyncOpenAI | None = None, env_file_path: str | None = None, env_file_encoding: str | None = None, - instruction_role: str | None = None, + instruction_role: Literal["system", "user", "assistant"] | None = None, ) -> None: """Initialize an NvidiaChatCompletion service. Args: ai_model_id (str): NVIDIA model name, see https://docs.api.nvidia.com/nim/reference/ + If not provided, defaults to "meta/llama-3.1-8b-instruct". service_id (str | None): Service ID tied to the execution settings. api_key (str | None): The optional API key to use. If provided will override, the env vars or .env file value. @@ -77,7 +82,8 @@ def __init__( env_file_path (str | None): Use the environment settings file as a fallback to environment variables. (Optional) env_file_encoding (str | None): The encoding of the environment settings file. (Optional) - instruction_role (str | None): The role to use for 'instruction' messages. (Optional) + instruction_role (Literal["system", "user", "assistant"] | None): The role to use for + 'instruction' messages. Defaults to "system". (Optional) """ try: nvidia_settings = NvidiaSettings( @@ -93,6 +99,7 @@ def __init__( if not client and not nvidia_settings.api_key: raise ServiceInitializationError("The NVIDIA API key is required.") if not nvidia_settings.chat_model_id: + # Default fallback model: meta/llama-3.1-8b-instruct nvidia_settings.chat_model_id = "meta/llama-3.1-8b-instruct" logger.warning(f"Default chat model set as: {nvidia_settings.chat_model_id}") From ba67b499083ea2354ecc35df698563ff5c6f27aa Mon Sep 17 00:00:00 2001 From: Soumili Nandi Date: Wed, 10 Sep 2025 10:38:57 -0700 Subject: [PATCH 06/11] update tests and reformats --- .../setup/chat_completion_services.py | 52 +++--- .../connectors/ai/nvidia/README.md | 8 +- .../nvidia_prompt_execution_settings.py | 7 +- .../nvidia/services/nvidia_chat_completion.py | 49 +++--- .../ai/nvidia/services/nvidia_handler.py | 12 +- .../ai/nvidia/services/nvidia_model_types.py | 2 +- .../ai/nvidia/settings/nvidia_settings.py | 4 +- .../test_nvidia_prompt_execution_settings.py | 29 ++-- .../services/test_nvidia_chat_completion.py | 160 +++++++++++------- .../ai/nvidia/services/test_nvidia_handler.py | 14 +- .../nvidia/settings/test_nvidia_settings.py | 16 +- 11 files changed, 188 insertions(+), 165 deletions(-) diff --git a/python/samples/concepts/setup/chat_completion_services.py b/python/samples/concepts/setup/chat_completion_services.py index b8f7fba86893..918e61c536db 100644 --- a/python/samples/concepts/setup/chat_completion_services.py +++ b/python/samples/concepts/setup/chat_completion_services.py @@ -24,11 +24,11 @@ class Services(str, Enum): BEDROCK = "bedrock" GOOGLE_AI = "google_ai" MISTRAL_AI = "mistral_ai" - NVIDIA = "nvidia" OLLAMA = "ollama" ONNX = "onnx" VERTEX_AI = "vertex_ai" DEEPSEEK = "deepseek" + NVIDIA = "nvidia" service_id = "default" @@ -61,11 +61,11 @@ def get_chat_completion_service_and_request_settings( Services.BEDROCK: lambda: get_bedrock_chat_completion_service_and_request_settings(), Services.GOOGLE_AI: lambda: get_google_ai_chat_completion_service_and_request_settings(), Services.MISTRAL_AI: lambda: get_mistral_ai_chat_completion_service_and_request_settings(), - Services.NVIDIA: lambda: get_nvidia_chat_completion_service_and_request_settings(), Services.OLLAMA: lambda: get_ollama_chat_completion_service_and_request_settings(), Services.ONNX: lambda: get_onnx_chat_completion_service_and_request_settings(), Services.VERTEX_AI: lambda: get_vertex_ai_chat_completion_service_and_request_settings(), Services.DEEPSEEK: lambda: get_deepseek_chat_completion_service_and_request_settings(), + Services.NVIDIA: lambda: get_nvidia_chat_completion_service_and_request_settings(), } # Call the appropriate lambda or function based on the service name @@ -284,30 +284,6 @@ def get_mistral_ai_chat_completion_service_and_request_settings() -> tuple[ return chat_service, request_settings -def get_nvidia_chat_completion_service_and_request_settings() -> tuple[ - "ChatCompletionClientBase", "PromptExecutionSettings" -]: - """Return NVIDIA chat completion service and request settings. - - The service credentials can be read by 3 ways: - 1. Via the constructor - 2. Via the environment variables - 3. Via an environment file - - The request settings control the behavior of the service. The default settings are sufficient to get started. - However, you can adjust the settings to suit your needs. - Note: Some of the settings are NOT meant to be set by the user. - Please refer to the Semantic Kernel Python documentation for more information: - https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel-python - """ - from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion, NvidiaChatPromptExecutionSettings - - chat_service = NvidiaChatCompletion(service_id=service_id) - request_settings = NvidiaChatPromptExecutionSettings(service_id=service_id) - - return chat_service, request_settings - - def get_ollama_chat_completion_service_and_request_settings() -> tuple[ "ChatCompletionClientBase", "PromptExecutionSettings" ]: @@ -440,3 +416,27 @@ def get_deepseek_chat_completion_service_and_request_settings() -> tuple[ request_settings = OpenAIChatPromptExecutionSettings(service_id=service_id) return chat_service, request_settings + + +def get_nvidia_chat_completion_service_and_request_settings() -> tuple[ + "ChatCompletionClientBase", "PromptExecutionSettings" +]: + """Return NVIDIA chat completion service and request settings. + + The service credentials can be read by 3 ways: + 1. Via the constructor + 2. Via the environment variables + 3. Via an environment file + + The request settings control the behavior of the service. The default settings are sufficient to get started. + However, you can adjust the settings to suit your needs. + Note: Some of the settings are NOT meant to be set by the user. + Please refer to the Semantic Kernel Python documentation for more information: + https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel-python + """ + from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion, NvidiaChatPromptExecutionSettings + + chat_service = NvidiaChatCompletion(service_id=service_id) + request_settings = NvidiaChatPromptExecutionSettings(service_id=service_id) + + return chat_service, request_settings diff --git a/python/semantic_kernel/connectors/ai/nvidia/README.md b/python/semantic_kernel/connectors/ai/nvidia/README.md index cddf8734934a..0533f5aa4fa9 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/README.md +++ b/python/semantic_kernel/connectors/ai/nvidia/README.md @@ -47,13 +47,7 @@ kernel.add_service(chat_service) ### Basic chat completion ```python -from semantic_kernel.contents import ChatHistory -from semantic_kernel.contents.utils.author_role import AuthorRole - -chat_history = ChatHistory() -chat_history.add_message(AuthorRole.USER, "Hello, how are you?") -response = await kernel.get_service("nvidia-chat").get_chat_message_content(chat_history) -print(response.content) +response = await kernel.invoke_prompt("Hello, how are you?") ``` ### Using with Chat Completion Agent diff --git a/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py b/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py index d35d9a1b6976..85f2d49dce05 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py +++ b/python/semantic_kernel/connectors/ai/nvidia/prompt_execution_settings/nvidia_prompt_execution_settings.py @@ -2,7 +2,7 @@ from typing import Annotated, Any, Literal -from pydantic import Field, BaseModel +from pydantic import BaseModel, Field from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings @@ -57,7 +57,6 @@ class NvidiaChatPromptExecutionSettings(NvidiaPromptExecutionSettings): response_format: ( dict[Literal["type"], Literal["text", "json_object"]] | dict[str, Any] | type[BaseModel] | type | None ) = None - thinking_mode: bool | None = None seed: int | None = None extra_headers: dict | None = None extra_body: dict | None = None @@ -68,7 +67,7 @@ class NvidiaChatPromptExecutionSettings(NvidiaPromptExecutionSettings): def prepare_settings_dict(self, **kwargs) -> dict[str, Any]: """Override only for embeddings to exclude input_type and truncate.""" return self.model_dump( - exclude={"service_id", "extension_data", "structured_json_response", "thinking_mode", "response_format"}, + exclude={"service_id", "extension_data", "structured_json_response", "response_format"}, exclude_none=True, by_alias=True, - ) \ No newline at end of file + ) diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py index 302db1de3e32..d3a96a5574de 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py @@ -3,14 +3,23 @@ import logging import sys from collections.abc import AsyncGenerator -from typing import Any, Literal, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Literal from openai import AsyncOpenAI -from pydantic import ValidationError - from openai.types.chat.chat_completion import ChatCompletion, Choice -from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice as ChunkChoice +from openai.types.chat.chat_completion_chunk import ChatCompletionChunk +from openai.types.chat.chat_completion_chunk import Choice as ChunkChoice +from pydantic import ValidationError +from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase +from semantic_kernel.connectors.ai.completion_usage import CompletionUsage +from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( + NvidiaChatPromptExecutionSettings, +) +from semantic_kernel.connectors.ai.nvidia.services.nvidia_handler import NvidiaHandler +from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes +from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings +from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings from semantic_kernel.contents import ( AuthorRole, ChatMessageContent, @@ -21,38 +30,28 @@ TextContent, ) from semantic_kernel.contents.chat_history import ChatHistory -from semantic_kernel.connectors.ai.completion_usage import CompletionUsage -from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( - NvidiaChatPromptExecutionSettings, -) -from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings +from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError +from semantic_kernel.utils.feature_stage_decorator import experimental from semantic_kernel.utils.telemetry.model_diagnostics.decorators import ( trace_chat_completion, trace_streaming_chat_completion, ) -from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase -from semantic_kernel.connectors.ai.nvidia.services.nvidia_handler import NvidiaHandler -from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes -from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings -from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError -from semantic_kernel.utils.feature_stage_decorator import experimental - if sys.version_info >= (3, 12): from typing import override # pragma: no cover else: from typing_extensions import override # pragma: no cover if TYPE_CHECKING: - from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration - + pass + logger: logging.Logger = logging.getLogger(__name__) @experimental class NvidiaChatCompletion(NvidiaHandler, ChatCompletionClientBase): """NVIDIA Chat completion class. - + This class does not support function calling. The SUPPORTS_FUNCTION_CALLING attribute is set to False (inherited from the base class). """ @@ -66,7 +65,7 @@ def __init__( client: AsyncOpenAI | None = None, env_file_path: str | None = None, env_file_encoding: str | None = None, - instruction_role: Literal["system", "user", "assistant"] | None = None, + instruction_role: Literal["system", "user", "assistant", "developer"] | None = None, ) -> None: """Initialize an NvidiaChatCompletion service. @@ -82,7 +81,7 @@ def __init__( env_file_path (str | None): Use the environment settings file as a fallback to environment variables. (Optional) env_file_encoding (str | None): The encoding of the environment settings file. (Optional) - instruction_role (Literal["system", "user", "assistant"] | None): The role to use for + instruction_role (Literal["system", "user", "assistant", "developer"] | None): The role to use for 'instruction' messages. Defaults to "system". (Optional) """ try: @@ -121,7 +120,7 @@ def __init__( ) @classmethod - def from_dict(cls : type["NvidiaChatCompletion"], settings: dict[str, Any]) -> "NvidiaChatCompletion": + def from_dict(cls: type["NvidiaChatCompletion"], settings: dict[str, Any]) -> "NvidiaChatCompletion": """Initialize an NVIDIA service from a dictionary of settings. Args: @@ -283,9 +282,7 @@ def _get_function_call_from_chat_choice(self, choice: Choice | ChunkChoice) -> l ] return [] - def _handle_structured_output( - self, request_settings: NvidiaChatPromptExecutionSettings - ) -> None: + def _handle_structured_output(self, request_settings: NvidiaChatPromptExecutionSettings) -> None: """Handle structured output for NVIDIA models using nvext parameter.""" response_format = getattr(request_settings, "response_format", None) if response_format: @@ -313,4 +310,4 @@ def _prepare_chat_history_for_request( for message in chat_history.messages: message_dict = {role_key: message.role.value, content_key: message.content} messages.append(message_dict) - return messages \ No newline at end of file + return messages diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py index 88e5882f973a..3a3bcf182e57 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py @@ -6,9 +6,9 @@ from openai import AsyncOpenAI, AsyncStream from openai.types.chat.chat_completion import ChatCompletion +from openai.types.chat.chat_completion_chunk import ChatCompletionChunk from openai.types.completion import Completion from openai.types.create_embedding_response import CreateEmbeddingResponse -from openai.types.chat.chat_completion_chunk import ChatCompletionChunk from semantic_kernel.connectors.ai.nvidia import ( NvidiaPromptExecutionSettings, @@ -39,7 +39,7 @@ async def _send_request(self, settings: PromptExecutionSettings) -> RESPONSE_TYP if self.ai_model_type == NvidiaModelTypes.EMBEDDING: assert isinstance(settings, NvidiaPromptExecutionSettings) # nosec return await self._send_embedding_request(settings) - elif self.ai_model_type == NvidiaModelTypes.CHAT: + if self.ai_model_type == NvidiaModelTypes.CHAT: assert isinstance(settings, NvidiaPromptExecutionSettings) # nosec return await self._send_chat_completion_request(settings) @@ -59,17 +59,19 @@ async def _send_embedding_request(self, settings: NvidiaPromptExecutionSettings) ex, ) from ex - async def _send_chat_completion_request(self, settings: NvidiaPromptExecutionSettings) -> ChatCompletion | AsyncStream[Any]: + async def _send_chat_completion_request( + self, settings: NvidiaPromptExecutionSettings + ) -> ChatCompletion | AsyncStream[Any]: """Send a request to the NVIDIA chat completion endpoint.""" try: settings_dict = settings.prepare_settings_dict() - + # Handle structured output if nvext is present in extra_body if settings.extra_body and "nvext" in settings.extra_body: if "extra_body" not in settings_dict: settings_dict["extra_body"] = {} settings_dict["extra_body"]["nvext"] = settings.extra_body["nvext"] - + response = await self.client.chat.completions.create(**settings_dict) self.store_usage(response) return response diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py index 7dc37d4f9de3..e9222f36a6dd 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_model_types.py @@ -7,4 +7,4 @@ class NvidiaModelTypes(Enum): """Nvidia model types, can be text, chat, or embedding.""" EMBEDDING = "embedding" - CHAT = "chat" \ No newline at end of file + CHAT = "chat" diff --git a/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py b/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py index 68616954823e..35e2e2f05cbe 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py +++ b/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py @@ -29,7 +29,7 @@ class NvidiaSettings(KernelBaseSettings): env_prefix: ClassVar[str] = "NVIDIA_" - api_key: SecretStr | None = None + api_key: SecretStr | None = None base_url: str = "https://integrate.api.nvidia.com/v1" embedding_model_id: str | None = None - chat_model_id: str | None = None + chat_model_id: str | None = None diff --git a/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py b/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py index 291bb8a20a12..2f3851b6a0c1 100644 --- a/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py +++ b/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py @@ -4,9 +4,9 @@ from pydantic import BaseModel, ValidationError from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( - NvidiaPromptExecutionSettings, NvidiaChatPromptExecutionSettings, NvidiaEmbeddingPromptExecutionSettings, + NvidiaPromptExecutionSettings, ) @@ -36,14 +36,14 @@ def test_validation_temperature_range(self): # Valid values settings = NvidiaPromptExecutionSettings(temperature=0.0) assert settings.temperature == 0.0 - + settings = NvidiaPromptExecutionSettings(temperature=2.0) assert settings.temperature == 2.0 - + # Invalid values with pytest.raises(ValidationError): NvidiaPromptExecutionSettings(temperature=-0.1) - + with pytest.raises(ValidationError): NvidiaPromptExecutionSettings(temperature=2.1) @@ -60,23 +60,20 @@ def test_init_with_defaults(self): def test_response_format_with_pydantic_model(self): """Test response_format with Pydantic model.""" + class TestModel(BaseModel): name: str value: int - settings = NvidiaChatPromptExecutionSettings( - response_format=TestModel - ) - + settings = NvidiaChatPromptExecutionSettings(response_format=TestModel) + assert settings.response_format == TestModel assert settings.structured_json_response is True def test_response_format_with_dict(self): """Test response_format with dictionary.""" - settings = NvidiaChatPromptExecutionSettings( - response_format={"type": "json_object"} - ) - + settings = NvidiaChatPromptExecutionSettings(response_format={"type": "json_object"}) + assert settings.response_format == {"type": "json_object"} assert settings.structured_json_response is False @@ -100,7 +97,7 @@ def test_init_with_values(self): input_type="passage", truncate="START", ) - + assert settings.input == ["hello", "world"] assert settings.encoding_format == "base64" assert settings.input_type == "passage" @@ -111,10 +108,10 @@ def test_validation_encoding_format(self): # Valid values settings = NvidiaEmbeddingPromptExecutionSettings(encoding_format="float") assert settings.encoding_format == "float" - + settings = NvidiaEmbeddingPromptExecutionSettings(encoding_format="base64") assert settings.encoding_format == "base64" - + # Invalid values with pytest.raises(ValidationError): - NvidiaEmbeddingPromptExecutionSettings(encoding_format="invalid") \ No newline at end of file + NvidiaEmbeddingPromptExecutionSettings(encoding_format="invalid") diff --git a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py index 22e563416fee..05fde910eb11 100644 --- a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py +++ b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py @@ -1,7 +1,12 @@ # Copyright (c) Microsoft. All rights reserved. +from unittest.mock import AsyncMock, patch + import pytest -from unittest.mock import AsyncMock, MagicMock, patch +from openai.resources.chat.completions import AsyncCompletions +from openai.types.chat import ChatCompletion, ChatCompletionMessage +from openai.types.chat.chat_completion import Choice +from openai.types.completion_usage import CompletionUsage from pydantic import BaseModel from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion @@ -9,106 +14,137 @@ NvidiaChatPromptExecutionSettings, ) from semantic_kernel.contents import ChatHistory -from semantic_kernel.contents.author_role import AuthorRole +from semantic_kernel.exceptions import ServiceInitializationError, ServiceResponseException @pytest.fixture -def mock_openai_client(): - """Create a mock OpenAI client.""" - with patch("semantic_kernel.connectors.ai.nvidia.services.nvidia_chat_completion.AsyncOpenAI") as mock_client: - mock_client.return_value = AsyncMock() - yield mock_client.return_value +def nvidia_unit_test_env(monkeypatch, exclude_list, override_env_param_dict): + """Fixture to set environment variables for NvidiaChatCompletion.""" + if exclude_list is None: + exclude_list = [] + if override_env_param_dict is None: + override_env_param_dict = {} -@pytest.fixture -def nvidia_chat_completion(mock_openai_client): - """Create a NvidiaChatCompletion instance with mocked client.""" - return NvidiaChatCompletion( - ai_model_id="meta/llama-3.1-8b-instruct", - api_key="test-api-key", + env_vars = {"NVIDIA_API_KEY": "test_api_key", "NVIDIA_CHAT_MODEL_ID": "meta/llama-3.1-8b-instruct"} + + env_vars.update(override_env_param_dict) + + for key, value in env_vars.items(): + if key not in exclude_list: + monkeypatch.setenv(key, value) + else: + monkeypatch.delenv(key, raising=False) + + return env_vars + + +def _create_mock_chat_completion(content: str = "Hello!") -> ChatCompletion: + """Helper function to create a mock ChatCompletion response.""" + message = ChatCompletionMessage(role="assistant", content=content) + choice = Choice( + finish_reason="stop", + index=0, + message=message, + ) + usage = CompletionUsage(completion_tokens=20, prompt_tokens=10, total_tokens=30) + return ChatCompletion( + id="test-id", + choices=[choice], + created=1234567890, + model="meta/llama-3.1-8b-instruct", + object="chat.completion", + usage=usage, ) class TestNvidiaChatCompletion: """Test cases for NvidiaChatCompletion.""" - def test_init_with_defaults(self): + def test_init_with_defaults(self, nvidia_unit_test_env): """Test initialization with default values.""" - with patch("semantic_kernel.connectors.ai.nvidia.services.nvidia_chat_completion.AsyncOpenAI"): - service = NvidiaChatCompletion(api_key="test-key") - assert service.ai_model_id == "meta/llama-3.1-8b-instruct" + service = NvidiaChatCompletion() + assert service.ai_model_id == nvidia_unit_test_env["NVIDIA_CHAT_MODEL_ID"] - def test_get_prompt_execution_settings_class(self, nvidia_chat_completion): + def test_get_prompt_execution_settings_class(self, nvidia_unit_test_env): """Test getting the prompt execution settings class.""" - from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import NvidiaChatPromptExecutionSettings - assert nvidia_chat_completion.get_prompt_execution_settings_class() == NvidiaChatPromptExecutionSettings + service = NvidiaChatCompletion() + from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( + NvidiaChatPromptExecutionSettings, + ) + + assert service.get_prompt_execution_settings_class() == NvidiaChatPromptExecutionSettings + + @pytest.mark.parametrize("exclude_list", [["NVIDIA_API_KEY"]], indirect=True) + def test_init_with_empty_api_key(self, nvidia_unit_test_env): + """Test initialization fails with empty API key.""" + with pytest.raises(ServiceInitializationError): + NvidiaChatCompletion() + + @pytest.mark.parametrize("exclude_list", [["NVIDIA_CHAT_MODEL_ID"]], indirect=True) + def test_init_with_empty_model_id(self, nvidia_unit_test_env): + """Test initialization with empty model ID uses default.""" + service = NvidiaChatCompletion() + assert service.ai_model_id == "meta/llama-3.1-8b-instruct" + + def test_init_with_custom_model_id(self, nvidia_unit_test_env): + """Test initialization with custom model ID.""" + custom_model = "custom/nvidia-model" + service = NvidiaChatCompletion(ai_model_id=custom_model) + assert service.ai_model_id == custom_model @pytest.mark.asyncio - async def test_get_chat_message_contents(self, nvidia_chat_completion, mock_openai_client): + @patch.object(AsyncCompletions, "create", new_callable=AsyncMock) + async def test_get_chat_message_contents(self, mock_create, nvidia_unit_test_env): """Test basic chat completion.""" - # Mock the response - mock_response = MagicMock() - mock_response.choices = [ - MagicMock( - message=MagicMock(role="assistant", content="Hello!"), - finish_reason="stop", - ) - ] - mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20, total_tokens=30) - mock_openai_client.chat.completions.create.return_value = mock_response - - # Test + mock_create.return_value = _create_mock_chat_completion("Hello!") + + service = NvidiaChatCompletion() chat_history = ChatHistory() - chat_history.add_message(AuthorRole.USER, "Hello") + chat_history.add_user_message("Hello") settings = NvidiaChatPromptExecutionSettings() - - result = await nvidia_chat_completion.get_chat_message_contents(chat_history, settings) - + + result = await service.get_chat_message_contents(chat_history, settings) + assert len(result) == 1 assert result[0].content == "Hello!" @pytest.mark.asyncio - async def test_structured_output_with_pydantic_model(self, nvidia_chat_completion, mock_openai_client): + @patch.object(AsyncCompletions, "create", new_callable=AsyncMock) + async def test_structured_output_with_pydantic_model(self, mock_create, nvidia_unit_test_env): """Test structured output with Pydantic model.""" + # Define test model class TestModel(BaseModel): name: str value: int - # Mock response - mock_response = MagicMock() - mock_response.choices = [ - MagicMock( - message=MagicMock(role="assistant", content='{"name": "test", "value": 42}'), - finish_reason="stop", - ) - ] - mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20, total_tokens=30) - mock_openai_client.chat.completions.create.return_value = mock_response - - # Test + mock_create.return_value = _create_mock_chat_completion('{"name": "test", "value": 42}') + + service = NvidiaChatCompletion() chat_history = ChatHistory() - chat_history.add_message(AuthorRole.USER, "Give me structured data") + chat_history.add_user_message("Give me structured data") settings = NvidiaChatPromptExecutionSettings() settings.response_format = TestModel - - result = await nvidia_chat_completion.get_chat_message_contents(chat_history, settings) - + + await service.get_chat_message_contents(chat_history, settings) + # Verify nvext was passed - call_args = mock_openai_client.chat.completions.create.call_args[1] + call_args = mock_create.call_args[1] assert "extra_body" in call_args assert "nvext" in call_args["extra_body"] assert "guided_json" in call_args["extra_body"]["nvext"] @pytest.mark.asyncio - async def test_error_handling(self, nvidia_chat_completion, mock_openai_client): + @patch.object(AsyncCompletions, "create", new_callable=AsyncMock) + async def test_error_handling(self, mock_create, nvidia_unit_test_env): """Test error handling.""" - mock_openai_client.chat.completions.create.side_effect = Exception("API Error") - + mock_create.side_effect = Exception("API Error") + + service = NvidiaChatCompletion() chat_history = ChatHistory() - chat_history.add_message(AuthorRole.USER, "Hello") + chat_history.add_user_message("Hello") settings = NvidiaChatPromptExecutionSettings() - - from semantic_kernel.exceptions import ServiceResponseException + with pytest.raises(ServiceResponseException): - await nvidia_chat_completion.get_chat_message_contents(chat_history, settings) \ No newline at end of file + await service.get_chat_message_contents(chat_history, settings) diff --git a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py index ddb3e766eb1a..137d8bfdb044 100644 --- a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py +++ b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py @@ -1,22 +1,22 @@ # Copyright (c) Microsoft. All rights reserved. +from unittest.mock import AsyncMock, MagicMock + import pytest -from unittest.mock import AsyncMock, MagicMock, patch from openai import AsyncOpenAI -from semantic_kernel.connectors.ai.nvidia.services.nvidia_handler import NvidiaHandler -from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( NvidiaChatPromptExecutionSettings, NvidiaEmbeddingPromptExecutionSettings, ) +from semantic_kernel.connectors.ai.nvidia.services.nvidia_handler import NvidiaHandler +from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes @pytest.fixture def mock_openai_client(): """Create a mock OpenAI client.""" - client = AsyncMock(spec=AsyncOpenAI) - return client + return AsyncMock(spec=AsyncOpenAI) @pytest.fixture @@ -41,7 +41,7 @@ def test_init(self, mock_openai_client): ai_model_id="test-model", api_key="test-key", ) - + assert handler.client == mock_openai_client assert handler.ai_model_type == NvidiaModelTypes.CHAT assert handler.ai_model_id == "test-model" @@ -145,4 +145,4 @@ async def test_send_request_unsupported_model_type(self, nvidia_handler): ) with pytest.raises(NotImplementedError, match="Model type UNSUPPORTED is not supported"): - await nvidia_handler._send_request(settings) \ No newline at end of file + await nvidia_handler._send_request(settings) diff --git a/python/tests/unit/connectors/ai/nvidia/settings/test_nvidia_settings.py b/python/tests/unit/connectors/ai/nvidia/settings/test_nvidia_settings.py index 739a8f4e9c15..4ac818ad480d 100644 --- a/python/tests/unit/connectors/ai/nvidia/settings/test_nvidia_settings.py +++ b/python/tests/unit/connectors/ai/nvidia/settings/test_nvidia_settings.py @@ -1,7 +1,5 @@ # Copyright (c) Microsoft. All rights reserved. -import pytest -from pydantic import ValidationError from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings @@ -25,7 +23,7 @@ def test_init_with_values(self): embedding_model_id="test-embedding-model", chat_model_id="test-chat-model", ) - + assert settings.api_key.get_secret_value() == "test-api-key" assert settings.base_url == "https://custom.nvidia.com/v1" assert settings.embedding_model_id == "test-embedding-model" @@ -38,11 +36,11 @@ def test_env_prefix(self): def test_api_key_secret_str(self): """Test that api_key is properly handled as SecretStr.""" settings = NvidiaSettings(api_key="secret-key") - + # Should be SecretStr type - assert hasattr(settings.api_key, 'get_secret_value') + assert hasattr(settings.api_key, "get_secret_value") assert settings.api_key.get_secret_value() == "secret-key" - + # Should not expose the secret in string representation str_repr = str(settings) assert "secret-key" not in str_repr @@ -51,8 +49,8 @@ def test_environment_variables(self, monkeypatch): """Test that environment variables override defaults.""" monkeypatch.setenv("NVIDIA_API_KEY", "env-key") monkeypatch.setenv("NVIDIA_CHAT_MODEL_ID", "env-chat") - + settings = NvidiaSettings() - + assert settings.api_key.get_secret_value() == "env-key" - assert settings.chat_model_id == "env-chat" \ No newline at end of file + assert settings.chat_model_id == "env-chat" From ac71d56cb688828cf0214963ea3e0c093d1293cc Mon Sep 17 00:00:00 2001 From: Soumili Nandi Date: Wed, 10 Sep 2025 10:45:01 -0700 Subject: [PATCH 07/11] documentation for default chat model --- python/samples/concepts/setup/ALL_SETTINGS.md | 2 +- .../ai/nvidia/services/nvidia_chat_completion.py | 9 ++++++--- .../connectors/ai/nvidia/settings/nvidia_settings.py | 2 ++ .../ai/nvidia/services/test_nvidia_chat_completion.py | 3 ++- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/python/samples/concepts/setup/ALL_SETTINGS.md b/python/samples/concepts/setup/ALL_SETTINGS.md index d185d1b92cf5..cb7ea81af670 100644 --- a/python/samples/concepts/setup/ALL_SETTINGS.md +++ b/python/samples/concepts/setup/ALL_SETTINGS.md @@ -30,7 +30,7 @@ | | [VertexAITextEmbedding](../../../semantic_kernel/connectors/ai/google/google_ai/services/google_ai_text_embedding.py) | project_id,
region,
embedding_model_id | VERTEX_AI_PROJECT_ID,
VERTEX_AI_REGION,
VERTEX_AI_EMBEDDING_MODEL_ID | Yes,
No,
Yes | | | HuggingFace | [HuggingFaceTextCompletion](../../../semantic_kernel/connectors/ai/hugging_face/services/hf_text_completion.py) | ai_model_id | N/A | Yes | | | | [HuggingFaceTextEmbedding](../../../semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py) | ai_model_id | N/A | Yes | | -| NVIDIA NIM | [NvidiaChatCompletion](../../../semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py) | ai_model_id,
api_key,
base_url | NVIDIA_CHAT_MODEL_ID,
NVIDIA_API_KEY,
NVIDIA_BASE_URL | Yes,
Yes,
No | [NvidiaAISettings](../../../semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py) | +| NVIDIA NIM | [NvidiaChatCompletion](../../../semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py) | ai_model_id,
api_key,
base_url | NVIDIA_CHAT_MODEL_ID,
NVIDIA_API_KEY,
NVIDIA_BASE_URL | Yes (default: meta/llama-3.1-8b-instruct),
Yes,
No | [NvidiaAISettings](../../../semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py) | | | [NvidiaTextEmbedding](../../../semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py) | ai_model_id,
api_key,
base_url | NVIDIA_API_KEY,
NVIDIA_TEXT_EMBEDDING_MODEL_ID,
NVIDIA_BASE_URL | Yes | [NvidiaAISettings](../../../semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py) | | Mistral AI | [MistralAIChatCompletion](../../../semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py) | ai_model_id,
api_key | MISTRALAI_CHAT_MODEL_ID,
MISTRALAI_API_KEY | Yes,
Yes | [MistralAISettings](../../../semantic_kernel/connectors/ai/mistral_ai/settings/mistral_ai_settings.py) | | | [MistralAITextEmbedding](../../../semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_text_embedding.py) | ai_model_id,
api_key | MISTRALAI_EMBEDDING_MODEL_ID,
MISTRALAI_API_KEY | Yes,
Yes | | diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py index d3a96a5574de..da324ba9a1a9 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py @@ -47,6 +47,9 @@ logger: logging.Logger = logging.getLogger(__name__) +# Default NVIDIA chat model when none is specified +DEFAULT_NVIDIA_CHAT_MODEL = "meta/llama-3.1-8b-instruct" + @experimental class NvidiaChatCompletion(NvidiaHandler, ChatCompletionClientBase): @@ -72,7 +75,7 @@ def __init__( Args: ai_model_id (str): NVIDIA model name, see https://docs.api.nvidia.com/nim/reference/ - If not provided, defaults to "meta/llama-3.1-8b-instruct". + If not provided, defaults to DEFAULT_NVIDIA_CHAT_MODEL. service_id (str | None): Service ID tied to the execution settings. api_key (str | None): The optional API key to use. If provided will override, the env vars or .env file value. @@ -98,8 +101,8 @@ def __init__( if not client and not nvidia_settings.api_key: raise ServiceInitializationError("The NVIDIA API key is required.") if not nvidia_settings.chat_model_id: - # Default fallback model: meta/llama-3.1-8b-instruct - nvidia_settings.chat_model_id = "meta/llama-3.1-8b-instruct" + # Default fallback model + nvidia_settings.chat_model_id = DEFAULT_NVIDIA_CHAT_MODEL logger.warning(f"Default chat model set as: {nvidia_settings.chat_model_id}") # Create client if not provided diff --git a/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py b/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py index 35e2e2f05cbe..0c7eb37641d3 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py +++ b/python/semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py @@ -24,6 +24,8 @@ class NvidiaSettings(KernelBaseSettings): (Env var NVIDIA_BASE_URL) - embedding_model_id: str | None - The NVIDIA embedding model ID to use, for example, nvidia/nv-embed-v1. (Env var NVIDIA_EMBEDDING_MODEL_ID) + - chat_model_id: str | None - The NVIDIA chat model ID to use. + (Env var NVIDIA_CHAT_MODEL_ID) - env_file_path: if provided, the .env settings are read from this file path location """ diff --git a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py index 05fde910eb11..a4008f9a1c05 100644 --- a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py +++ b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_chat_completion.py @@ -13,6 +13,7 @@ from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( NvidiaChatPromptExecutionSettings, ) +from semantic_kernel.connectors.ai.nvidia.services.nvidia_chat_completion import DEFAULT_NVIDIA_CHAT_MODEL from semantic_kernel.contents import ChatHistory from semantic_kernel.exceptions import ServiceInitializationError, ServiceResponseException @@ -85,7 +86,7 @@ def test_init_with_empty_api_key(self, nvidia_unit_test_env): def test_init_with_empty_model_id(self, nvidia_unit_test_env): """Test initialization with empty model ID uses default.""" service = NvidiaChatCompletion() - assert service.ai_model_id == "meta/llama-3.1-8b-instruct" + assert service.ai_model_id == DEFAULT_NVIDIA_CHAT_MODEL def test_init_with_custom_model_id(self, nvidia_unit_test_env): """Test initialization with custom model ID.""" From 8321b8dfad252cbb91e23a247d16ff171b5d224f Mon Sep 17 00:00:00 2001 From: Soumili Nandi Date: Wed, 10 Sep 2025 14:37:53 -0700 Subject: [PATCH 08/11] removing unused imports --- .../connectors/ai/nvidia/services/nvidia_chat_completion.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py index da324ba9a1a9..fdd2ae0dce0b 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py @@ -3,7 +3,7 @@ import logging import sys from collections.abc import AsyncGenerator -from typing import TYPE_CHECKING, Any, Literal +from typing import Any, Literal from openai import AsyncOpenAI from openai.types.chat.chat_completion import ChatCompletion, Choice @@ -42,9 +42,6 @@ else: from typing_extensions import override # pragma: no cover -if TYPE_CHECKING: - pass - logger: logging.Logger = logging.getLogger(__name__) # Default NVIDIA chat model when none is specified From 3ff90c83ed046899a77745e719d1177895f108aa Mon Sep 17 00:00:00 2001 From: Soumili Nandi Date: Wed, 10 Sep 2025 14:41:16 -0700 Subject: [PATCH 09/11] test fixes --- .../ai/nvidia/services/test_nvidia_handler.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py index 137d8bfdb044..b83aad461aa1 100644 --- a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py +++ b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py @@ -38,13 +38,10 @@ def test_init(self, mock_openai_client): handler = NvidiaHandler( client=mock_openai_client, ai_model_type=NvidiaModelTypes.CHAT, - ai_model_id="test-model", - api_key="test-key", ) assert handler.client == mock_openai_client assert handler.ai_model_type == NvidiaModelTypes.CHAT - assert handler.ai_model_id == "test-model" assert handler.MODEL_PROVIDER_NAME == "nvidia" @pytest.mark.asyncio @@ -59,7 +56,7 @@ async def test_send_chat_completion_request(self, nvidia_handler, mock_openai_cl ) ] mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20, total_tokens=30) - mock_openai_client.chat.completions.create.return_value = mock_response + mock_openai_client.chat.completions.create = AsyncMock(return_value=mock_response) # Create settings settings = NvidiaChatPromptExecutionSettings( @@ -88,7 +85,7 @@ async def test_send_chat_completion_request_with_nvext(self, nvidia_handler, moc ) ] mock_response.usage = MagicMock(prompt_tokens=10, completion_tokens=20, total_tokens=30) - mock_openai_client.chat.completions.create.return_value = mock_response + mock_openai_client.chat.completions.create = AsyncMock(return_value=mock_response) # Create settings with nvext settings = NvidiaChatPromptExecutionSettings( @@ -123,7 +120,7 @@ async def test_send_embedding_request(self, mock_openai_client): MagicMock(embedding=[0.4, 0.5, 0.6]), ] mock_response.usage = MagicMock(prompt_tokens=10, total_tokens=10) - mock_openai_client.embeddings.create.return_value = mock_response + mock_openai_client.embeddings.create = AsyncMock(return_value=mock_response) # Create settings settings = NvidiaEmbeddingPromptExecutionSettings( @@ -136,13 +133,20 @@ async def test_send_embedding_request(self, mock_openai_client): assert result == [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] @pytest.mark.asyncio - async def test_send_request_unsupported_model_type(self, nvidia_handler): + async def test_send_request_unsupported_model_type(self, mock_openai_client): """Test send_request with unsupported model type.""" - nvidia_handler.ai_model_type = "UNSUPPORTED" + # Create a handler with invalid model type by bypassing validation + handler = NvidiaHandler( + client=mock_openai_client, + ai_model_type=NvidiaModelTypes.CHAT, + ) + # Manually set the attribute to bypass Pydantic validation + object.__setattr__(handler, 'ai_model_type', "UNSUPPORTED") + settings = NvidiaChatPromptExecutionSettings( messages=[{"role": "user", "content": "Hello"}], model="test-model", ) with pytest.raises(NotImplementedError, match="Model type UNSUPPORTED is not supported"): - await nvidia_handler._send_request(settings) + await handler._send_request(settings) From 70182ce1990cb2f2efda758bdff804884c88dfb6 Mon Sep 17 00:00:00 2001 From: Soumili Nandi Date: Thu, 11 Sep 2025 08:35:05 -0700 Subject: [PATCH 10/11] fixed unit test --- .../test_nvidia_prompt_execution_settings.py | 36 ++++++------------- .../ai/nvidia/services/test_nvidia_handler.py | 4 +-- 2 files changed, 12 insertions(+), 28 deletions(-) diff --git a/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py b/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py index 2f3851b6a0c1..8baf734bc3a5 100644 --- a/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py +++ b/python/tests/unit/connectors/ai/nvidia/prompt_execution_settings/test_nvidia_prompt_execution_settings.py @@ -16,36 +16,23 @@ class TestNvidiaPromptExecutionSettings: def test_init_with_defaults(self): """Test initialization with default values.""" settings = NvidiaPromptExecutionSettings() - assert settings.ai_model_id is None - assert settings.temperature is None - assert settings.stream is False + assert settings.format is None + assert settings.options is None def test_init_with_values(self): """Test initialization with specific values.""" settings = NvidiaPromptExecutionSettings( - ai_model_id="test-model", - temperature=0.7, - max_tokens=100, + format="json", + options={"key": "value"}, ) - assert settings.ai_model_id == "test-model" - assert settings.temperature == 0.7 - assert settings.max_tokens == 100 + assert settings.format == "json" + assert settings.options == {"key": "value"} - def test_validation_temperature_range(self): - """Test temperature validation range.""" + def test_validation_format_values(self): + """Test format validation values.""" # Valid values - settings = NvidiaPromptExecutionSettings(temperature=0.0) - assert settings.temperature == 0.0 - - settings = NvidiaPromptExecutionSettings(temperature=2.0) - assert settings.temperature == 2.0 - - # Invalid values - with pytest.raises(ValidationError): - NvidiaPromptExecutionSettings(temperature=-0.1) - - with pytest.raises(ValidationError): - NvidiaPromptExecutionSettings(temperature=2.1) + settings = NvidiaPromptExecutionSettings(format="json") + assert settings.format == "json" class TestNvidiaChatPromptExecutionSettings: @@ -56,7 +43,6 @@ def test_init_with_defaults(self): settings = NvidiaChatPromptExecutionSettings() assert settings.messages is None assert settings.response_format is None - assert settings.structured_json_response is False def test_response_format_with_pydantic_model(self): """Test response_format with Pydantic model.""" @@ -68,14 +54,12 @@ class TestModel(BaseModel): settings = NvidiaChatPromptExecutionSettings(response_format=TestModel) assert settings.response_format == TestModel - assert settings.structured_json_response is True def test_response_format_with_dict(self): """Test response_format with dictionary.""" settings = NvidiaChatPromptExecutionSettings(response_format={"type": "json_object"}) assert settings.response_format == {"type": "json_object"} - assert settings.structured_json_response is False class TestNvidiaEmbeddingPromptExecutionSettings: diff --git a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py index b83aad461aa1..f6a0bfefd247 100644 --- a/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py +++ b/python/tests/unit/connectors/ai/nvidia/services/test_nvidia_handler.py @@ -141,8 +141,8 @@ async def test_send_request_unsupported_model_type(self, mock_openai_client): ai_model_type=NvidiaModelTypes.CHAT, ) # Manually set the attribute to bypass Pydantic validation - object.__setattr__(handler, 'ai_model_type', "UNSUPPORTED") - + object.__setattr__(handler, "ai_model_type", "UNSUPPORTED") + settings = NvidiaChatPromptExecutionSettings( messages=[{"role": "user", "content": "Hello"}], model="test-model", From 7165f6b61a82d53f69fcc0e36eb4218b4f9085ac Mon Sep 17 00:00:00 2001 From: Soumili Nandi Date: Mon, 15 Sep 2025 08:39:53 -0700 Subject: [PATCH 11/11] Fix mypy type checking errors and make handler type annotations consistent --- .../connectors/ai/nvidia/services/nvidia_handler.py | 13 +++++++------ .../ai/nvidia/services/nvidia_text_embedding.py | 5 ++++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py index 3a3bcf182e57..0f7efff01703 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_handler.py @@ -10,8 +10,9 @@ from openai.types.completion import Completion from openai.types.create_embedding_response import CreateEmbeddingResponse -from semantic_kernel.connectors.ai.nvidia import ( - NvidiaPromptExecutionSettings, +from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import ( + NvidiaChatPromptExecutionSettings, + NvidiaEmbeddingPromptExecutionSettings, ) from semantic_kernel.connectors.ai.nvidia.services.nvidia_model_types import NvidiaModelTypes from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings @@ -37,15 +38,15 @@ class NvidiaHandler(KernelBaseModel, ABC): async def _send_request(self, settings: PromptExecutionSettings) -> RESPONSE_TYPE: """Send a request to the Nvidia API.""" if self.ai_model_type == NvidiaModelTypes.EMBEDDING: - assert isinstance(settings, NvidiaPromptExecutionSettings) # nosec + assert isinstance(settings, NvidiaEmbeddingPromptExecutionSettings) # nosec return await self._send_embedding_request(settings) if self.ai_model_type == NvidiaModelTypes.CHAT: - assert isinstance(settings, NvidiaPromptExecutionSettings) # nosec + assert isinstance(settings, NvidiaChatPromptExecutionSettings) # nosec return await self._send_chat_completion_request(settings) raise NotImplementedError(f"Model type {self.ai_model_type} is not supported") - async def _send_embedding_request(self, settings: NvidiaPromptExecutionSettings) -> list[Any]: + async def _send_embedding_request(self, settings: NvidiaEmbeddingPromptExecutionSettings) -> list[Any]: """Send a request to the OpenAI embeddings endpoint.""" try: # unsupported parameters are internally excluded from main dict and added to extra_body @@ -60,7 +61,7 @@ async def _send_embedding_request(self, settings: NvidiaPromptExecutionSettings) ) from ex async def _send_chat_completion_request( - self, settings: NvidiaPromptExecutionSettings + self, settings: NvidiaChatPromptExecutionSettings ) -> ChatCompletion | AsyncStream[Any]: """Send a request to the NVIDIA chat completion endpoint.""" try: diff --git a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py index dddf32b00971..aa0da4d51859 100644 --- a/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py +++ b/python/semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py @@ -73,7 +73,10 @@ def __init__( if not nvidia_settings.api_key: logger.warning("API_KEY is missing, inference may fail.") if not client: - client = AsyncOpenAI(api_key=nvidia_settings.api_key.get_secret_value(), base_url=nvidia_settings.base_url) + client = AsyncOpenAI( + api_key=nvidia_settings.api_key.get_secret_value() if nvidia_settings.api_key else None, + base_url=nvidia_settings.base_url, + ) super().__init__( ai_model_id=nvidia_settings.embedding_model_id, api_key=nvidia_settings.api_key.get_secret_value() if nvidia_settings.api_key else None,