langchain-ai
diff --git a/‎docs/core_docs/docs/how_to/chat_model_caching.mdx‎
Lines changed: 37 additions & 4 deletions b/‎docs/core_docs/docs/how_to/chat_model_caching.mdx‎
Lines changed: 37 additions & 4 deletions
diff --git a/‎docs/core_docs/docs/how_to/llm_caching.mdx‎
Lines changed: 27 additions & 0 deletions b/‎docs/core_docs/docs/how_to/llm_caching.mdx‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎docs/core_docs/docs/integrations/llm_caching/index.mdx‎
Lines changed: 0 additions & 14 deletions b/‎docs/core_docs/docs/integrations/llm_caching/index.mdx‎
Lines changed: 0 additions & 14 deletions
diff --git a/‎docs/core_docs/docs/integrations/platforms/microsoft.mdx‎
Lines changed: 1 addition & 1 deletion b/‎docs/core_docs/docs/integrations/platforms/microsoft.mdx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/core_docs/docs/integrations/llm_caching/azure_cosmosdb_nosql.mdx‎ renamed to ‎docs/core_docs/docs/integrations/semantic_caching/azure_cosmosdb_nosql.mdx‎
Lines changed: 1 addition & 1 deletion b/‎docs/core_docs/docs/integrations/llm_caching/azure_cosmosdb_nosql.mdx‎ renamed to ‎docs/core_docs/docs/integrations/semantic_caching/azure_cosmosdb_nosql.mdx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/core_docs/docs/integrations/semantic_caching/index.mdx‎
Lines changed: 29 additions & 0 deletions b/‎docs/core_docs/docs/integrations/semantic_caching/index.mdx‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎docs/core_docs/docs/integrations/semantic_caching/mongodb_atlas.mdx‎
Lines changed: 59 additions & 0 deletions b/‎docs/core_docs/docs/integrations/semantic_caching/mongodb_atlas.mdx‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎docs/core_docs/sidebars.js‎
Lines changed: 2 additions & 2 deletions b/‎docs/core_docs/sidebars.js‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/src/cache/chat_models/mongodb.ts‎
Lines changed: 26 additions & 0 deletions b/‎examples/src/cache/chat_models/mongodb.ts‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎examples/src/caches/azure_cosmosdb_nosql/azure_cosmosdb_nosql.ts‎ renamed to ‎examples/src/cache/semantic_cache/azure_cosmosdb_nosql.ts‎ b/‎examples/src/caches/azure_cosmosdb_nosql/azure_cosmosdb_nosql.ts‎ renamed to ‎examples/src/cache/semantic_cache/azure_cosmosdb_nosql.ts‎
@@ -17,13 +17,21 @@ LangChain provides an optional caching layer for chat models. This is useful for
 
 It can save you money by reducing the number of API calls you make to the LLM provider, if you're often requesting the same completion multiple times.
 It can speed up your application by reducing the number of API calls you make to the LLM provider.
+There are two methods for caching LLM responses: **semantic caching** and **key-value caching**.
+
+- **Key-value caching** stores responses based on exact query (LLM prompt) matches. When the same request is made again, the cached LLM response is retrieved. Key-value caching is fast, but its shortcoming is that even small changes in the prompt—such as punctuation differences or slight wording variations (e.g., _"yes"_ vs. _"yeah"_)—can cause a cache miss, leading to a fresh LLM call.
+- **[Semantic caching](/docs/integrations/semantic_caching)** improves upon key-value caching by relying on the meaning of the prompt rather than exact matches. If a new LLM prompt is semantically similar to a previously cached one, the stored response is retrieved and reused, reducing LLM usage costs. A typical implementation of semantic caching involves storing prompts as embeddings and using similarity search to identify a cache hit.
+
+This page goes over key-value caching. To use semantic caching, see [LLM Semantic Caching](/docs/integrations/semantic_caching).
+
+NOTE: The caching integrations in LangChain do not include expiring old or unused values. Based on your use and application, you should decide if and what kind of eviction process you need to implement and implement it directly with the storage.
 
 import CodeBlock from "@theme/CodeBlock";
 
 ```typescript
 import { ChatOpenAI } from "@langchain/openai";
 
-// To make the caching really obvious, lets use a slower model.
+// To make the caching really obvious, let's use a slower model.
 const model = new ChatOpenAI({
   model: "gpt-4",
   cache: true,
@@ -122,13 +130,13 @@ import AdvancedUpstashRedisCacheExample from "@examples/cache/chat_models/upstas
 
 ## Caching with Vercel KV
 
-LangChain provides an Vercel KV-based cache. Like the Redis-based cache, this cache is useful if you want to share the cache across multiple processes or servers. The Vercel KV client uses HTTP and supports edge environments. To use it, you'll need to install the `@vercel/kv` package:
+LangChain provides a Vercel KV-based cache. Like the Redis-based cache, this cache is useful if you want to share the cache across multiple processes or servers. The Vercel KV client uses HTTP and supports edge environments. To use it, you'll need to install the `@vercel/kv` package:
 
 ```bash npm2yarn
 npm install @vercel/kv
 ```
 
-You'll also need an Vercel account and a [KV database](https://vercel.com/docs/storage/vercel-kv/kv-reference) to connect to. Once you've done that, retrieve your REST URL and REST token.
+You'll also need a Vercel account and a [KV database](https://vercel.com/docs/storage/vercel-kv/kv-reference) to connect to. Once you've done that, retrieve your REST URL and REST token.
 
 Then, you can pass a `cache` option when you instantiate the LLM. For example:
 
@@ -156,14 +164,39 @@ import CloudflareExample from "@examples/cache/chat_models/cloudflare_kv.ts";
 
 <CodeBlock language="typescript">{CloudflareExample}</CodeBlock>
 
+## Caching with MongoDB
+
+NOTE: This section is for using MongoDB as a key-value LLM caching. For **semantic caching** see [MongoDB Atlas Semantic Cache](/docs/integrations/semantic_caching/mongodb_atlas).
+
+LangChain provides MongoDB-based cache support. This is especially useful if your application is already using MongoDB as a database, and you don't want to add another data store integration.
+
+To use this cache, you'll need to install the `mongodb` as well as `@langchain/mongodb`:
+
+```bash npm2yarn
+npm install mongodb @langchain/mongodb @langchain/core
+```
+
+The MongoDB cache integration does not create a collection for your cache storage.
+Assuming you have already set up your collection, you can utilize it for caching as follows:
+
+import MongoDBCacheExample from "@examples/cache/chat_models/mongodb.ts";
+
+<CodeBlock language="typescript">{MongoDBCacheExample}</CodeBlock>
+
+Hint: The key-value cache is stored in the collection using `prompt` as the key, and `llm` as the value. You can speed up fetching cached entries by setting up an index (not a Vector Search index) on prompt.
+
+```
+await db.collection(LLM_CACHE).createIndex({ prompt: 1 })
+```
+
 ## Caching on the File System
 
 :::warning
 This cache is not recommended for production use. It is only intended for local development.
 :::
 
 LangChain provides a simple file system cache.
-By default the cache is stored a temporary directory, but you can specify a custom directory if you want.
+By default, the cache is stored in a temporary directory, but you can specify a custom directory if you want.
 
 ```typescript
 const cache = await LocalFileCache.create();
 
@@ -4,6 +4,8 @@ sidebar_position: 2
 
 # How to cache model responses
 
+NOTE: This section is for older language models that take a string as input and return a string as output. Users should be using almost exclusively the newer Chat Models as most model providers have adopted a chat like interface for interacting with language models. See [Chat Model Caching](/docs/how_to/chat_model_caching) for implementing caching, including Semantic Caching with the new chat models.
+
 LangChain provides an optional caching layer for LLMs. This is useful for two reasons:
 
 It can save you money by reducing the number of API calls you make to the LLM provider, if you're often requesting the same completion multiple times.
@@ -197,6 +199,31 @@ import CloudflareExample from "@examples/cache/cloudflare_kv.ts";
 
 <CodeBlock language="typescript">{CloudflareExample}</CodeBlock>
 
+## Caching with MongoDB
+
+NOTE: This section is for using MongoDB as a key-value LLM caching. For **semantic caching** see [MongoDB Atlas Semantic Cache](/docs/integrations/semantic_caching/mongodb_atlas).
+
+LangChain provides MongoDB-based cache support. This is especially useful if your application is already using MongoDB as a database, and you don't want to add another data store integration.
+
+To use this cache, you'll need to install the `mongodb` as well as `@langchain/mongodb`:
+
+```bash npm2yarn
+npm install mongodb @langchain/mongodb @langchain/core
+```
+
+The MongoDB cache integration does not create a collection for your cache storage.
+Assuming you have already set up your collection, you can utilize it for caching as follows:
+
+import MongoDBCacheExample from "@examples/cache/chat_models/mongodb.ts";
+
+<CodeBlock language="typescript">{MongoDBCacheExample}</CodeBlock>
+
+Hint: The key-value cache is stored in the collection using `prompt` as the key, and `llm` as the value. You can speed up fetching cached entries by setting up an index (not a Vector Search index) on prompt.
+
+```
+await db.collection(LLM_CACHE).createIndex({ prompt: 1 })
+```
+
 ## Caching on the File System
 
 :::warning
 
@@ -144,7 +144,7 @@ import { AzureCosmosDBMongoDBVectorStore } from "@langchain/azure-cosmosdb";
 npm install @langchain/azure-cosmosdb @langchain/core
 ```
 
-See a [usage example](/docs/integrations/llm_caching/azure_cosmosdb_nosql).
+See a [usage example](/docs/integrations/semantic_caching/azure_cosmosdb_nosql).
 
 ```typescript
 import { AzureCosmosDBNoSQLSemanticCache } from "@langchain/azure-cosmosdb";
 
@@ -49,7 +49,7 @@ If you create the container beforehand, make sure to set the partition key to `/
 
 ## Usage example
 
-import Example from "@examples/caches/azure_cosmosdb_nosql/azure_cosmosdb_nosql.ts";
+import Example from "@examples/cache/semantic_cache/azure_cosmosdb_nosql.ts";
 
 <CodeBlock language="typescript">{Example}</CodeBlock>
 
 
@@ -0,0 +1,29 @@
+---
+sidebar_class_name: hidden
+hide_table_of_contents: true
+---
+
+# LLM Semantic Caching
+
+[Caching LLM calls](/docs/how_to/chat_model_caching) can be useful for testing, cost savings, and speed.
+
+## Caching LLM Responses: Semantic vs. Key-Value Caching
+
+Currently, there are two methods for caching LLM responses: **semantic caching** and **key-value caching**.
+
+_Key-value LLM caching guide:_
+See [LLM Cache how-to guide](/docs/how_to/llm_caching)
+
+### Key-Value Caching
+
+Key-value caching stores responses based on exact query (LLM prompt) matches. When the same request is made again, the cached LLM response is retrieved. Key-value caching is fast, but its shortcoming is that even small changes in the prompt—such as punctuation differences or slight wording variations (e.g., _"yes"_ vs. _"yeah"_)—can cause a cache miss, leading to a fresh LLM call.
+
+### Semantic Caching
+
+Semantic caching improves upon this by relying on the meaning of the prompt rather than exact matches. If a new LLM prompt is semantically similar to a previously cached one, the stored response is retrieved and reused, reducing LLM usage costs. A typical implementation of semantic caching involves storing prompts as embeddings and using similarity search to identify a cache hit.
+
+_Semantic Caching Guides:_
+
+import { IndexTable } from "@theme/FeatureTables";
+
+<IndexTable />
@@ -0,0 +1,59 @@
+# MongoDB Atlas Semantic Cache
+
+This page documents the MongoDB Atlas integration for **semantic caching** of LLM generation outputs. See [MongoDB Atlas](docs/integrations/vectorstores/mongodb_atlas) for additional setup and configuration information.
+
+Semantic caching allows you to cache and retrieve generations based on vector similarity, so that similar prompts can share cached results.
+
+## Install dependencies
+
+You'll first need to install the [`@langchain/mongodb`](https://www.npmjs.com/package/@langchain/mongodb) as well as [`mongodb`](https://www.npmjs.com/package/mongodb):
+
+import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx";
+
+<IntegrationInstallTooltip></IntegrationInstallTooltip>
+
+```bash npm2yarn
+npm install mongodb @langchain/mongodb @langchain/core
+```
+
+## Set up the cache collection
+
+You will need the `mongodb` driver package to manage your database, collection(s), and vector search indexes. The `@langchain/mongodb` package provides the integration for LangChain and expects a ready-to-use collection and vector search index.
+
+You can set up a vector collection either through the MongoDB Atlas UI or with commands such as the following:
+
+```typescript
+import { MongoClient } from "mongodb";
+
+const client = new MongoClient(process.env.MONGODB_ATLAS_URI);
+await client.connect();
+const db = client.db("db_name");
+const collection = db.collection("collection_name");
+
+// Create a search index. The dimensions must match your embedding dimensions.
+await collection.createSearchIndex({
+  name: "default",
+  definition: {
+    mappings: {
+      dynamic: true,
+      fields: {
+        embedding: {
+          dimensions: 1024,
+          similarity: "cosine",
+          type: "knnVector",
+        },
+      },
+    },
+  },
+});
+```
+
+Note that the initial creation of a vector search index takes some time (it may take more than 30 seconds). If you query the vector index while it is initializing, you may receive an error or an empty response. Also, each time a new document (vector embedding, etc.) is added, the index needs to update before it can return the new document as part of its response. You can query the vector index while it is being updated, but it will return data based on the old index.
+
+## LLM call with semantic caching
+
+import MongoDBAtlasSemanticCacheExample from "@examples/cache/semantic_cache/mongodb_atlas.ts";
+
+import CodeBlock from "@theme/CodeBlock";
+
+<CodeBlock language="typescript">{MongoDBAtlasSemanticCacheExample}</CodeBlock>
@@ -370,13 +370,13 @@ module.exports = {
               items: [
                 {
                   type: "autogenerated",
-                  dirName: "integrations/llm_caching",
+                  dirName: "integrations/semantic_caching",
                   className: "hidden",
                 },
               ],
               link: {
                 type: "doc",
-                id: "integrations/llm_caching/index",
+                id: "integrations/semantic_caching/index",
               },
             },
             {
 
@@ -0,0 +1,26 @@
+import { MongoClient } from "mongodb";
+import { MongoDBCache } from "@langchain/mongodb";
+import { OpenAI } from "@langchain/openai";
+
+let client;
+if (process.env.MONGODB_ATLAS_URI) {
+  client = new MongoClient(process.env.MONGODB_ATLAS_URI);
+} else {
+  client = new MongoClient("mongodb://localhost:27017");
+}
+
+await client.connect();
+const collection = client.db("langchain").collection("llm_cache");
+
+const cache = new MongoDBCache({ collection });
+
+const model = new OpenAI({ cache });
+
+const response1 = await model.invoke("Tell me a joke!");
+console.log(response1);
+
+const response2 = await model.invoke("Tell me a joke!");
+console.log(response2);
+
+// Hint: You can speed up fetching cached entries by setting up an index on prompt:
+// await collection.createIndex({ prompt: 1 });