|
1 | 1 | import re
|
| 2 | +import string |
2 | 3 | from typing import Dict, Any
|
3 | 4 |
|
4 | 5 |
|
@@ -47,19 +48,108 @@ def last_content_without_prompt(data: Dict[str, Any], **params: Dict[str, Any])
|
47 | 48 | return new_content_str
|
48 | 49 |
|
49 | 50 |
|
| 51 | +def _get_pattern_value(pattern_str: str, value_str: str): |
| 52 | + literal_text_arr = [] |
| 53 | + field_name_arr = [] |
| 54 | + for literal_text, field_name, _, _ in string.Formatter().parse(pattern_str): |
| 55 | + literal_text_arr.append(literal_text) |
| 56 | + if field_name is not None: |
| 57 | + field_name_arr.append( |
| 58 | + field_name if field_name else str(len(field_name_arr)) |
| 59 | + ) |
| 60 | + |
| 61 | + pattern_values = {} |
| 62 | + last_end = 0 |
| 63 | + for i, literal_text in enumerate(literal_text_arr): |
| 64 | + start = value_str.find(literal_text, last_end) |
| 65 | + if i == len(literal_text_arr) - 1: |
| 66 | + end = len(value_str) |
| 67 | + else: |
| 68 | + end = value_str.find(literal_text_arr[i + 1], start + 1) |
| 69 | + if start == -1 or end == -1: |
| 70 | + break |
| 71 | + start += len(literal_text) |
| 72 | + pattern_values[field_name_arr[i]] = value_str[start:end] |
| 73 | + last_end = end |
| 74 | + return pattern_values |
| 75 | + |
| 76 | + |
| 77 | +def last_content_without_template(data: Dict[str, Any], **params: Dict[str, Any]) -> Any: |
| 78 | + """get the last content's template values of the message list without template content. |
| 79 | +
|
| 80 | + When considering a cache agent or chain, the majority of the content consists of template content, |
| 81 | + while the essential information is simply a list of parameters within the template. |
| 82 | + In this way, the cache key is composed of a string made up of all the parameter values in the list. |
| 83 | +
|
| 84 | + WARNING: Two parameters without intervals cannot appear in the template, |
| 85 | + for example: template = "{foo}{hoo}" is not supported, |
| 86 | + but template = "{foo}:{hoo}" is supported |
| 87 | +
|
| 88 | + :param data: the user llm request data |
| 89 | + :type data: Dict[str, Any] |
| 90 | +
|
| 91 | + :Example with str template: |
| 92 | + .. code-block:: python |
| 93 | +
|
| 94 | + from gptcache import Config |
| 95 | + from gptcache.processor.pre import last_content_without_template |
| 96 | +
|
| 97 | + template_obj = "tell me a joke about {subject}" |
| 98 | + prompt = template_obj.format(subject="animal") |
| 99 | + value = last_content_without_template( |
| 100 | + data={"messages": [{"content": prompt}]}, cache_config=Config(template=template_obj) |
| 101 | + ) |
| 102 | + print(value) |
| 103 | + # ['animal'] |
| 104 | +
|
| 105 | + :Example with langchain template: |
| 106 | + .. code-block:: python |
| 107 | +
|
| 108 | + from langchain import PromptTemplate |
| 109 | +
|
| 110 | + from gptcache import Config |
| 111 | + from gptcache.processor.pre import last_content_without_template |
| 112 | +
|
| 113 | + template_obj = PromptTemplate.from_template("tell me a joke about {subject}") |
| 114 | + prompt = template_obj.format(subject="animal") |
| 115 | +
|
| 116 | + value = last_content_without_template( |
| 117 | + data={"messages": [{"content": prompt}]}, |
| 118 | + cache_config=Config(template=template_obj.template), |
| 119 | + ) |
| 120 | + print(value) |
| 121 | + # ['animal'] |
| 122 | +
|
| 123 | + NOTE: At present, only the simple PromptTemplate in langchain is supported. |
| 124 | + For ChatPromptTemplate, it needs to be adjusted according to the template array. |
| 125 | + If you need to use it, you need to pass in the final dialog template yourself. |
| 126 | + The reason why it cannot be advanced is that ChatPromptTemplate |
| 127 | + does not provide a method to directly return the template string. |
| 128 | + """ |
| 129 | + last_content_str = data.get("messages")[-1]["content"] |
| 130 | + cache_config = params.get("cache_config", None) |
| 131 | + if not (cache_config and cache_config.template): |
| 132 | + return last_content_str |
| 133 | + |
| 134 | + pattern_value = _get_pattern_value(cache_config.template, last_content_str) |
| 135 | + return str(list(pattern_value.values())) |
| 136 | + |
| 137 | + |
50 | 138 | def all_content(data: Dict[str, Any], **_: Dict[str, Any]) -> Any:
|
51 |
| - """ get all content of the message list |
| 139 | + """get all content of the message list |
52 | 140 |
|
53 | 141 | :param data: the user llm request data
|
54 | 142 | :type data: Dict[str, Any]
|
55 | 143 |
|
56 |
| - Example: |
| 144 | + :Example: |
57 | 145 | .. code-block:: python
|
58 | 146 |
|
59 | 147 | from gptcache.processor.pre import all_content
|
60 | 148 |
|
61 |
| - content = all_content({"messages": [{"content": "foo1"}, {"content": "foo2"}]}) |
62 |
| - # content = "foo1\nfoo2" |
| 149 | + content = all_content( |
| 150 | + {"messages": [{"content": "foo1"}, {"content": "foo2"}]} |
| 151 | + ) |
| 152 | + # content = "foo1\\nfoo2" |
63 | 153 | """
|
64 | 154 | s = ""
|
65 | 155 | messages = data.get("messages")
|
|
0 commit comments