Skip to content

Commit 7db6237

Browse files
authored
Support to config the context pre-process by the yaml file (#396)
Signed-off-by: SimFG <[email protected]>
1 parent 03a2787 commit 7db6237

File tree

12 files changed

+284
-39
lines changed

12 files changed

+284
-39
lines changed

cache_config_template.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# For `model_src`, `evaluation`, `post_function`, `pre_function`,
22
# `storage_config` options, Check README for more.
33

4-
model_src:
4+
embedding:
55
onnx
6-
model_config:
6+
embedding_config:
77
# Set model kws here including `model`, `api_key` if needed
88
storage_config:
99
data_dir:
@@ -14,7 +14,7 @@ storage_config:
1414
# Set vector storage related params here
1515
evaluation:
1616
distance
17-
evaluation_kws:
17+
evaluation_config:
1818
# Set evaluation metric kws here
1919
pre_function:
2020
get_prompt

docs/release_note.md

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,97 @@ To read the following content, you need to understand the basic use of GPTCache,
55
- [Readme doc](https://github.com/zilliztech/GPTCache)
66
- [Usage doc](https://github.com/zilliztech/GPTCache/blob/main/docs/usage.md)
77

8+
## v0.1.28 (2023.5.29)
9+
To handle a large prompt, there are currently two options available:
10+
11+
1. Increase the column size of CacheStorage.
12+
13+
```python
14+
from gptcache.manager import manager_factory
15+
16+
data_manager = manager_factory(
17+
"sqlite,faiss", scalar_params={"table_len_config": {"question_question": 5000}}
18+
)
19+
20+
```
21+
More Details:
22+
- 'question_question': the question column size in the question table, default to 3000.
23+
- 'answer_answer': the answer column size in the answer table, default to 3000.
24+
- 'session_id': the session id column size in the session table, default to 1000.
25+
- 'dep_name': the name column size in the dep table, default to 1000.
26+
- 'dep_data': the data column size in the dep table, default to 3000.
27+
28+
2. When using a template, use the dynamic value in the template as the cache key instead of using the entire template as the key.
29+
30+
- **str template**
31+
```python
32+
from gptcache import Config
33+
from gptcache.processor.pre import last_content_without_template
34+
35+
template_obj = "tell me a joke about {subject}"
36+
prompt = template_obj.format(subject="animal")
37+
value = last_content_without_template(
38+
data={"messages": [{"content": prompt}]}, cache_config=Config(template=template_obj)
39+
)
40+
print(value)
41+
# ['animal']
42+
```
43+
44+
- **langchain prompt template**
45+
46+
```python
47+
from langchain import PromptTemplate
48+
49+
from gptcache import Config
50+
from gptcache.processor.pre import last_content_without_template
51+
52+
template_obj = PromptTemplate.from_template("tell me a joke about {subject}")
53+
prompt = template_obj.format(subject="animal")
54+
55+
value = last_content_without_template(
56+
data={"messages": [{"content": prompt}]},
57+
cache_config=Config(template=template_obj.template),
58+
)
59+
print(value)
60+
# ['animal']
61+
```
62+
63+
3. Wrap the openai object, reference: [BaseCacheLLM](https://gptcache.readthedocs.io/en/dev/references/adapter.html#module-gptcache.adapter.base)
64+
65+
```python
66+
import random
67+
68+
from gptcache import Cache
69+
from gptcache.adapter import openai
70+
from gptcache.adapter.api import init_similar_cache
71+
from gptcache.processor.pre import last_content
72+
73+
cache_obj = Cache()
74+
init_similar_cache(
75+
data_dir=str(random.random()), pre_func=last_content, cache_obj=cache_obj
76+
)
77+
78+
79+
def proxy_openai_chat_complete(*args, **kwargs):
80+
nonlocal is_proxy
81+
is_proxy = True
82+
import openai as real_openai
83+
84+
return real_openai.ChatCompletion.create(*args, **kwargs)
85+
86+
87+
openai.ChatCompletion.llm = proxy_openai_chat_complete
88+
openai.ChatCompletion.cache_args = {"cache_obj": cache_obj}
89+
90+
openai.ChatCompletion.create(
91+
model="gpt-3.5-turbo",
92+
messages=[
93+
{"role": "system", "content": "You are a helpful assistant."},
94+
{"role": "user", "content": "What's GitHub"},
95+
],
96+
)
97+
```
98+
899
## v0.1.27 (2023.5.25)
9100
1. Support the uform embedding, which can be used the **bilingual** (english + chinese) language
10101

examples/README.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# Example
22

33
- [How to run Visual Question Answering with MiniGPT-4](#How-to-run-Visual-Question-Answering-with-MiniGPT-4)
4-
- [How to set the `embedding` function](#How-to-set-the-embedding-function)
5-
- [How to set the `data manager` class](#How-to-set-the-data-manager-class)
6-
- [How to set the `similarity evaluation` interface](#How-to-set-the-similarity-evaluation-interface)
4+
- [How to set the **embedding** function](#How-to-set-the-embedding-function)
5+
- [How to set the **data manager** class](#How-to-set-the-data-manager-class)
6+
- [How to set the **similarity evaluation** interface](#How-to-set-the-similarity-evaluation-interface)
77
- [Other cache init params](#Other-cache-init-params)
88
- [How to run with session](#How-to-run-with-session)
99
- [How to use GPTCache server](#How-to-use-GPTCache-server)
@@ -572,10 +572,10 @@ The args are optional:
572572
You can config the server via a YAML file, here is an example config yaml:
573573
574574
```yaml
575-
model_src:
575+
embedding:
576576
onnx
577-
model_config:
578-
# Set model kws here including `model`, `api_key` if needed
577+
embedding_config:
578+
# Set embedding model params here
579579
storage_config:
580580
data_dir:
581581
gptcache_data
@@ -585,7 +585,7 @@ storage_config:
585585
# Set vector storage related params here
586586
evaluation:
587587
distance
588-
evaluation_kws:
588+
evaluation_config:
589589
# Set evaluation metric kws here
590590
pre_function:
591591
get_prompt
@@ -595,15 +595,15 @@ config:
595595
similarity_threshold: 0.8
596596
# Set other config here
597597
```
598-
- model_source: The model source.
599-
- model_config: The model name, model config, api key.
598+
- embedding: The embedding model source, options: [How to set the **embedding** function](#How-to-set-the-embedding-function)
599+
- embedding_config: The embedding model config, details: [Embedding Reference](https://gptcache.readthedocs.io/en/latest/references/embedding.html)
600600
- data_dir: The cache directory.
601601
- manager: The cache storage and vector storage.
602-
- evaluation: The evaluation storage.
602+
- evaluation: The evaluation component, options: [How to set the **similarity evaluation** interface](#How-to-set-the-similarity-evaluation-interface)
603+
- evaluation_config: The evaluation config, options: [Similarity Evaluation Reference](https://gptcache.readthedocs.io/en/latest/references/similarity_evaluation.html)
603604
- pre_function: The pre-processing function.
604605
- post_function: The post-processing function.
605-
606-
For `model_src`, `evaluation`, `storage_config` options, check [README.md](https://github.com/zilliztech/gpt-cache/tree/main/examples) for more.
606+
- config: The cache config, like `similarity_threshold`
607607
608608
**Use the docker to start the GPTCache server**
609609

examples/context_process/selective_context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from gptcache.adapter import openai
66
from gptcache.embedding import Onnx
77
from gptcache.manager import manager_factory
8-
from gptcache.processor.context.selective_context import SelectiveContextProcess
8+
from gptcache.processor.context import SelectiveContextProcess
99
from gptcache.similarity_evaluation import SearchDistanceEvaluation
1010
from gptcache.utils import import_selective_context
1111

examples/context_process/summarization_context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from gptcache.adapter import openai
66
from gptcache.embedding import Onnx
77
from gptcache.manager import manager_factory
8-
from gptcache.processor.context.summarization_context import SummarizationContextProcess
8+
from gptcache.processor.context import SummarizationContextProcess
99
from gptcache.similarity_evaluation.distance import SearchDistanceEvaluation
1010

1111

gptcache/adapter/api.py

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@
2121
from gptcache.embedding.base import BaseEmbedding
2222
from gptcache.manager import manager_factory
2323
from gptcache.manager.data_manager import DataManager
24+
from gptcache.processor.context import (
25+
SummarizationContextProcess,
26+
SelectiveContextProcess,
27+
ConcatContextProcess,
28+
)
2429
from gptcache.processor.post import temperature_softmax
2530
from gptcache.processor.pre import get_prompt
2631
from gptcache.similarity_evaluation import (
@@ -192,9 +197,15 @@ def init_similar_cache_from_config(config_dir: str, cache_obj: Optional[Cache] =
192197
else:
193198
init_conf = {}
194199

195-
model_src = init_conf.get("model_source", "onnx")
196-
model_config = init_conf.get("model_config", {})
197-
embedding_model = _get_model(model_src, model_config)
200+
# Due to the problem with the first naming, it is reserved to ensure compatibility
201+
embedding = init_conf.get("model_source", "")
202+
if not embedding:
203+
embedding = init_conf.get("embedding", "onnx")
204+
# ditto
205+
embedding_config = init_conf.get("model_config", {})
206+
if not embedding_config:
207+
embedding_config = init_conf.get("embedding_config", {})
208+
embedding_model = _get_model(embedding, embedding_config)
198209

199210
storage_config = init_conf.get("storage_config", {})
200211
storage_config.setdefault("manager", "sqlite,faiss")
@@ -205,13 +216,23 @@ def init_similar_cache_from_config(config_dir: str, cache_obj: Optional[Cache] =
205216
data_manager = manager_factory(**storage_config)
206217

207218
eval_strategy = init_conf.get("evaluation", "distance")
208-
eval_kws = init_conf.get("evaluation_kws")
209-
evaluation = _get_eval(eval_strategy, eval_kws)
219+
# Due to the problem with the first naming, it is reserved to ensure compatibility
220+
eval_config = init_conf.get("evaluation_kws", {})
221+
if not eval_config:
222+
eval_config = init_conf.get("evaluation_config", {})
223+
evaluation = _get_eval(eval_strategy, eval_config)
210224

211225
cache_obj = cache_obj if cache_obj else cache
212226

213-
pre_prcocess = init_conf.get("pre_function", "get_prompt")
214-
pre_func = _get_pre_func(pre_prcocess)
227+
pre_process = init_conf.get("pre_context_function")
228+
if pre_process:
229+
pre_func = _get_pre_context_function(
230+
pre_process, init_conf.get("pre_context_config")
231+
)
232+
pre_func = pre_func.pre_process
233+
else:
234+
pre_process = init_conf.get("pre_function", "get_prompt")
235+
pre_func = _get_pre_func(pre_process)
215236

216237
post_process = init_conf.get("post_function", "first")
217238
post_func = _get_post_func(post_process)
@@ -273,8 +294,19 @@ def _get_eval(strategy, kws=None):
273294
return KReciprocalEvaluation(**kws)
274295

275296

276-
def _get_pre_func(pre_prcocess):
277-
return getattr(gptcache.processor.pre, pre_prcocess)
297+
def _get_pre_func(pre_process):
298+
return getattr(gptcache.processor.pre, pre_process)
299+
300+
301+
def _get_pre_context_function(pre_context_process, kws=None):
302+
pre_context_process = pre_context_process.lower()
303+
kws = kws or {}
304+
if pre_context_process in "summarization":
305+
return SummarizationContextProcess(**kws)
306+
if pre_context_process in "selective":
307+
return SelectiveContextProcess(**kws)
308+
if pre_context_process in "concat":
309+
return ConcatContextProcess()
278310

279311

280312
def _get_post_func(post_process):
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from gptcache.utils.lazy_import import LazyImport
2+
3+
summarization = LazyImport(
4+
"summarization_context",
5+
globals(),
6+
"gptcache.processor.context.summarization_context",
7+
)
8+
selective = LazyImport(
9+
"selective_context", globals(), "gptcache.processor.context.selective_context"
10+
)
11+
concat = LazyImport(
12+
"concat_context", globals(), "gptcache.processor.context.concat_context"
13+
)
14+
15+
16+
__all__ = [
17+
"SummarizationContextProcess",
18+
"SelectiveContextProcess",
19+
"ConcatContextProcess",
20+
]
21+
22+
23+
def SummarizationContextProcess(summarizer=None, tokenizer=None, target_length=512):
24+
return summarization.SummarizationContextProcess(
25+
summarizer, tokenizer, target_length
26+
)
27+
28+
29+
def SelectiveContextProcess(
30+
model_type: str = "gpt2",
31+
lang: str = "en",
32+
reduce_ratio: float = 0.35,
33+
reduce_level: str = "phrase",
34+
):
35+
return selective.SelectiveContextProcess(
36+
model_type=model_type,
37+
lang=lang,
38+
reduce_ratio=reduce_ratio,
39+
reduce_level=reduce_level,
40+
)
41+
42+
43+
def ConcatContextProcess():
44+
return concat.ConcatContextProcess()

gptcache/processor/context/summarization_context.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ class SummarizationContextProcess(ContextProcess):
3131
"""
3232
def __init__(self, summarizer=transformers.pipeline("summarization", model="facebook/bart-large-cnn"),
3333
tokenizer=None, target_length=512):
34+
if not summarizer:
35+
summarizer = transformers.pipeline("summarization", model="facebook/bart-large-cnn")
3436
self.summarizer = summarizer
3537
self.target_length = target_length
3638
if tokenizer is None:

0 commit comments

Comments
 (0)