Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion python/samples/concepts/setup/ALL_SETTINGS.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
| | [VertexAITextEmbedding](../../../semantic_kernel/connectors/ai/google/google_ai/services/google_ai_text_embedding.py) | project_id, <br> region, <br> embedding_model_id | VERTEX_AI_PROJECT_ID, <br> VERTEX_AI_REGION, <br> VERTEX_AI_EMBEDDING_MODEL_ID | Yes, <br> No, <br> Yes | |
| HuggingFace | [HuggingFaceTextCompletion](../../../semantic_kernel/connectors/ai/hugging_face/services/hf_text_completion.py) | ai_model_id | N/A | Yes | |
| | [HuggingFaceTextEmbedding](../../../semantic_kernel/connectors/ai/hugging_face/services/hf_text_embedding.py) | ai_model_id | N/A | Yes | |
| NVIDIA NIM | [NvidiaTextEmbedding](../../../semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py) | ai_model_id, <br> api_key, <br> base_url | NVIDIA_API_KEY, <br> NVIDIA_TEXT_EMBEDDING_MODEL_ID, <br> NVIDIA_BASE_URL | Yes | [NvidiaAISettings](../../../semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py) |
| NVIDIA NIM | [NvidiaChatCompletion](../../../semantic_kernel/connectors/ai/nvidia/services/nvidia_chat_completion.py) | ai_model_id, <br> api_key, <br> base_url | NVIDIA_CHAT_MODEL_ID, <br> NVIDIA_API_KEY, <br> NVIDIA_BASE_URL | Yes (default: meta/llama-3.1-8b-instruct), <br> Yes, <br> No | [NvidiaAISettings](../../../semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py) |
| | [NvidiaTextEmbedding](../../../semantic_kernel/connectors/ai/nvidia/services/nvidia_text_embedding.py) | ai_model_id, <br> api_key, <br> base_url | NVIDIA_API_KEY, <br> NVIDIA_TEXT_EMBEDDING_MODEL_ID, <br> NVIDIA_BASE_URL | Yes | [NvidiaAISettings](../../../semantic_kernel/connectors/ai/nvidia/settings/nvidia_settings.py) |
| Mistral AI | [MistralAIChatCompletion](../../../semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_chat_completion.py) | ai_model_id, <br> api_key | MISTRALAI_CHAT_MODEL_ID, <br> MISTRALAI_API_KEY | Yes, <br> Yes | [MistralAISettings](../../../semantic_kernel/connectors/ai/mistral_ai/settings/mistral_ai_settings.py) |
| | [MistralAITextEmbedding](../../../semantic_kernel/connectors/ai/mistral_ai/services/mistral_ai_text_embedding.py) | ai_model_id, <br> api_key | MISTRALAI_EMBEDDING_MODEL_ID, <br> MISTRALAI_API_KEY | Yes, <br> Yes | |
| Ollama | [OllamaChatCompletion](../../../semantic_kernel/connectors/ai/ollama/services/ollama_chat_completion.py) | ai_model_id, <br> host | OLLAMA_CHAT_MODEL_ID, <br> OLLAMA_HOST | Yes, <br> No | [OllamaSettings](../../../semantic_kernel/connectors/ai/ollama/ollama_settings.py) |
Expand Down
26 changes: 26 additions & 0 deletions python/samples/concepts/setup/chat_completion_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class Services(str, Enum):
ONNX = "onnx"
VERTEX_AI = "vertex_ai"
DEEPSEEK = "deepseek"
NVIDIA = "nvidia"


service_id = "default"
Expand Down Expand Up @@ -64,6 +65,7 @@ def get_chat_completion_service_and_request_settings(
Services.ONNX: lambda: get_onnx_chat_completion_service_and_request_settings(),
Services.VERTEX_AI: lambda: get_vertex_ai_chat_completion_service_and_request_settings(),
Services.DEEPSEEK: lambda: get_deepseek_chat_completion_service_and_request_settings(),
Services.NVIDIA: lambda: get_nvidia_chat_completion_service_and_request_settings(),
}

# Call the appropriate lambda or function based on the service name
Expand Down Expand Up @@ -414,3 +416,27 @@ def get_deepseek_chat_completion_service_and_request_settings() -> tuple[
request_settings = OpenAIChatPromptExecutionSettings(service_id=service_id)

return chat_service, request_settings


def get_nvidia_chat_completion_service_and_request_settings() -> tuple[
"ChatCompletionClientBase", "PromptExecutionSettings"
]:
"""Return NVIDIA chat completion service and request settings.

The service credentials can be read by 3 ways:
1. Via the constructor
2. Via the environment variables
3. Via an environment file

The request settings control the behavior of the service. The default settings are sufficient to get started.
However, you can adjust the settings to suit your needs.
Note: Some of the settings are NOT meant to be set by the user.
Please refer to the Semantic Kernel Python documentation for more information:
https://learn.microsoft.com/en-us/python/api/semantic-kernel/semantic_kernel?view=semantic-kernel-python
"""
from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion, NvidiaChatPromptExecutionSettings

chat_service = NvidiaChatCompletion(service_id=service_id)
request_settings = NvidiaChatPromptExecutionSettings(service_id=service_id)

return chat_service, request_settings
36 changes: 35 additions & 1 deletion python/semantic_kernel/connectors/ai/nvidia/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# semantic_kernel.connectors.ai.nvidia

This connector enables integration with NVIDIA NIM API for text embeddings. It allows you to use NVIDIA's embedding models within the Semantic Kernel framework.
This connector enables integration with NVIDIA NIM API for text embeddings and chat completion. It allows you to use NVIDIA's models within the Semantic Kernel framework.

## Quick start

Expand All @@ -13,6 +13,8 @@ kernel = sk.Kernel()
### Add NVIDIA text embedding service
You can provide your API key directly or through environment variables
```python
from semantic_kernel.connectors.ai.nvidia import NvidiaTextEmbedding

embedding_service = NvidiaTextEmbedding(
ai_model_id="nvidia/nv-embedqa-e5-v5", # Default model if not specified
api_key="your-nvidia-api-key", # Can also use NVIDIA_API_KEY env variable
Expand All @@ -30,3 +32,35 @@ kernel.add_service(embedding_service)
texts = ["Hello, world!", "Semantic Kernel is awesome"]
embeddings = await kernel.get_service("nvidia-embeddings").generate_embeddings(texts)
```

### Add NVIDIA chat completion service
```python
from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion

chat_service = NvidiaChatCompletion(
ai_model_id="meta/llama-3.1-8b-instruct", # Default model if not specified
api_key="your-nvidia-api-key", # Can also use NVIDIA_API_KEY env variable
service_id="nvidia-chat" # Optional service identifier
)
kernel.add_service(chat_service)
```

### Basic chat completion
```python
response = await kernel.invoke_prompt("Hello, how are you?")
```

### Using with Chat Completion Agent
```python
from semantic_kernel.agents import ChatCompletionAgent
from semantic_kernel.connectors.ai.nvidia import NvidiaChatCompletion

agent = ChatCompletionAgent(
service=NvidiaChatCompletion(),
name="SK-Assistant",
instructions="You are a helpful assistant.",
)
response = await agent.get_response(messages="Write a haiku about Semantic Kernel.")
print(response.content)
```

4 changes: 4 additions & 0 deletions python/semantic_kernel/connectors/ai/nvidia/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
# Copyright (c) Microsoft. All rights reserved.

from semantic_kernel.connectors.ai.nvidia.prompt_execution_settings.nvidia_prompt_execution_settings import (
NvidiaChatPromptExecutionSettings,
NvidiaEmbeddingPromptExecutionSettings,
NvidiaPromptExecutionSettings,
)
from semantic_kernel.connectors.ai.nvidia.services.nvidia_chat_completion import NvidiaChatCompletion
from semantic_kernel.connectors.ai.nvidia.services.nvidia_text_embedding import NvidiaTextEmbedding
from semantic_kernel.connectors.ai.nvidia.settings.nvidia_settings import NvidiaSettings

__all__ = [
"NvidiaChatCompletion",
"NvidiaChatPromptExecutionSettings",
"NvidiaEmbeddingPromptExecutionSettings",
"NvidiaPromptExecutionSettings",
"NvidiaSettings",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import Annotated, Any, Literal

from pydantic import Field
from pydantic import BaseModel, Field

from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings

Expand All @@ -13,18 +13,6 @@ class NvidiaPromptExecutionSettings(PromptExecutionSettings):
format: Literal["json"] | None = None
options: dict[str, Any] | None = None

def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
"""Prepare the settings as a dictionary for sending to the AI service.

By default, this method excludes the service_id and extension_data fields.
As well as any fields that are None.
"""
return self.model_dump(
exclude={"service_id", "extension_data", "structured_json_response", "input_type", "truncate"},
exclude_none=True,
by_alias=True,
)


class NvidiaEmbeddingPromptExecutionSettings(NvidiaPromptExecutionSettings):
"""Settings for NVIDIA embedding prompt execution."""
Expand All @@ -39,3 +27,47 @@ class NvidiaEmbeddingPromptExecutionSettings(NvidiaPromptExecutionSettings):
extra_body: dict | None = None
timeout: float | None = None
dimensions: Annotated[int | None, Field(gt=0)] = None

def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
"""Override only for embeddings to exclude input_type and truncate."""
return self.model_dump(
exclude={"service_id", "extension_data", "structured_json_response", "input_type", "truncate"},
exclude_none=True,
by_alias=True,
)


class NvidiaChatPromptExecutionSettings(NvidiaPromptExecutionSettings):
"""Settings for NVIDIA chat prompt execution."""

messages: list[dict[str, str]] | None = None
ai_model_id: Annotated[str | None, Field(serialization_alias="model")] = None
temperature: float | None = None
top_p: float | None = None
n: int | None = None
stream: bool = False
stop: str | list[str] | None = None
max_tokens: int | None = None
presence_penalty: float | None = None
frequency_penalty: float | None = None
logit_bias: dict[str, float] | None = None
user: str | None = None
tools: list[dict[str, Any]] | None = None
tool_choice: str | dict[str, Any] | None = None
response_format: (
dict[Literal["type"], Literal["text", "json_object"]] | dict[str, Any] | type[BaseModel] | type | None
) = None
seed: int | None = None
extra_headers: dict | None = None
extra_body: dict | None = None
timeout: float | None = None
# NVIDIA-specific structured output support
nvext: dict[str, Any] | None = None

def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
"""Override only for embeddings to exclude input_type and truncate."""
return self.model_dump(
exclude={"service_id", "extension_data", "structured_json_response", "response_format"},
exclude_none=True,
by_alias=True,
)
Loading
Loading