Skip to content

Commit b21633c

Browse files
committed
Use the fastapi to improve the GPTCache server
Signed-off-by: SimFG <[email protected]>
1 parent 0e53a4c commit b21633c

File tree

7 files changed

+127
-80
lines changed

7 files changed

+127
-80
lines changed

docs/usage.md

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -336,18 +336,41 @@ $ docker run -p 8000:8000 -it zilliz/gptcache:latest
336336
GPTCache supports two ways of interaction with the server:
337337

338338
- With command line:
339-
```shell
340-
$ curl -X PUT -d "receive a hello message" "http://localhost:8000?prompt=hello"
341-
$ curl -X GET "http://localhost:8000?prompt=hello"
342-
"receive a hello message"
343-
```
339+
340+
put the data to cache
341+
342+
```shell
343+
curl -X 'POST' \
344+
'http://localhost:8000/put' \
345+
-H 'accept: application/json' \
346+
-H 'Content-Type: application/json' \
347+
-d '{
348+
"prompt": "Hi",
349+
"answer": "Hi back"
350+
}'
351+
```
352+
353+
get the data from the cache
354+
355+
```shell
356+
curl -X 'POST' \
357+
'http://localhost:8000/get' \
358+
-H 'accept: application/json' \
359+
-H 'Content-Type: application/json' \
360+
-d '{
361+
"prompt": "Hi"
362+
}'
363+
```
364+
365+
344366
- With python client:
345-
```python
346-
>>> from gptcache import Client
347367

348-
>>> client = Client(uri="http://localhost:8000")
349-
>>> client.put("Hi", "Hi back")
350-
200
351-
>>> client.get("Hi")
352-
'Hi back'
353-
```
368+
```python
369+
>>> from gptcache.client import Client
370+
371+
>>> client = Client(uri="http://localhost:8000")
372+
>>> client.put("Hi", "Hi back")
373+
200
374+
>>> client.get("Hi")
375+
'Hi back'
376+
```

examples/README.md

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -627,21 +627,44 @@ Also, you can start the service in a docker container:
627627
GPTCache supports two ways of interaction with the server:
628628
629629
- With command line:
630-
```shell
631-
$ curl -X PUT -d "receive a hello message" "http://localhost:8000?prompt=hello"
632-
$ curl -X GET "http://localhost:8000?prompt=hello"
633-
"receive a hello message"
634-
```
630+
631+
put the data to cache
632+
633+
```shell
634+
curl -X 'POST' \
635+
'http://localhost:8000/put' \
636+
-H 'accept: application/json' \
637+
-H 'Content-Type: application/json' \
638+
-d '{
639+
"prompt": "Hi",
640+
"answer": "Hi back"
641+
}'
642+
```
643+
644+
get the data from the cache
645+
646+
```shell
647+
curl -X 'POST' \
648+
'http://localhost:8000/get' \
649+
-H 'accept: application/json' \
650+
-H 'Content-Type: application/json' \
651+
-d '{
652+
"prompt": "Hi"
653+
}'
654+
```
655+
656+
635657
- With python client:
636-
```python
637-
>>> from gptcache import Client
638658
639-
>>> client = Client(uri="http://localhost:8000")
640-
>>> client.put("Hi", "Hi back")
641-
200
642-
>>> client.get("Hi")
643-
'Hi back'
644-
```
659+
```python
660+
>>> from gptcache.client import Client
661+
662+
>>> client = Client(uri="http://localhost:8000")
663+
>>> client.put("Hi", "Hi back")
664+
200
665+
>>> client.get("Hi")
666+
'Hi back'
667+
```
645668
646669
## [Benchmark](https://github.com/zilliztech/GPTCache/tree/main/examples/benchmark/benchmark_sqlite_faiss_onnx.py)
647670

gptcache/client.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import asyncio
2+
import json
23

34
from gptcache.utils import import_httpx
45

@@ -7,6 +8,9 @@
78
import httpx # pylint: disable=C0413
89

910

11+
_CLIENT_HEADER = {"Content-Type": "application/json", "Accept": "application/json"}
12+
13+
1014
class Client:
1115
"""GPTCache client to send requests to GPTCache server.
1216
@@ -28,20 +32,28 @@ def __init__(self, uri: str = "http://localhost:8000"):
2832

2933
async def _put(self, question: str, answer: str):
3034
async with httpx.AsyncClient() as client:
31-
headers = {"Content-Type": "application/x-www-form-urlencoded"}
32-
params = {"prompt": question}
33-
data = answer
35+
data = {
36+
"prompt": question,
37+
"answer": answer,
38+
}
3439

35-
response = await client.put(self._uri, params=params, headers=headers, data=data)
40+
response = await client.post(
41+
f"{self._uri}/put", headers=_CLIENT_HEADER, data=json.dumps(data)
42+
)
3643

3744
return response.status_code
3845

3946
async def _get(self, question: str):
4047
async with httpx.AsyncClient() as client:
41-
params = {"prompt": question}
42-
response = await client.get(self._uri, params=params)
48+
data = {
49+
"prompt": question,
50+
}
51+
52+
response = await client.post(
53+
f"{self._uri}/get", headers=_CLIENT_HEADER, data=json.dumps(data)
54+
)
4355

44-
return response.json()
56+
return response.json().get("answer")
4557

4658
def put(self, question: str, answer: str):
4759
"""

gptcache/utils/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
"import_paddle",
3838
"import_paddlenlp",
3939
"import_tiktoken",
40+
"import_fastapi",
4041
]
4142

4243
import importlib.util
@@ -235,3 +236,8 @@ def import_paddlenlp():
235236

236237
def import_tiktoken():
237238
_check_library("tiktoken")
239+
240+
241+
def import_fastapi():
242+
_check_library("uvicorn", package="'uvicorn[standard]'")
243+
_check_library("fastapi")

gptcache_server/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +0,0 @@
1-
from gptcache_server.server import start_server

gptcache_server/server.py

Lines changed: 28 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
import argparse
2-
import http.server
3-
import json
42

53
from gptcache import cache
64
from gptcache.adapter.api import (
@@ -9,59 +7,45 @@
97
init_similar_cache,
108
init_similar_cache_from_config,
119
)
10+
from gptcache.utils import import_fastapi, import_pydantic
1211

12+
import_fastapi()
13+
import_pydantic()
1314

14-
class GPTCacheHandler(http.server.BaseHTTPRequestHandler):
15-
"""
16-
HTTPServer handler for GPTCache Service.
17-
"""
15+
from fastapi import FastAPI
16+
import uvicorn
17+
from pydantic import BaseModel
1818

19-
# curl -X GET "http://localhost:8000?prompt=hello"
20-
def do_GET(self):
21-
params = self.path.split("?")[1]
22-
prompt = params.split("=")[1]
2319

24-
result = get(prompt)
20+
app = FastAPI()
2521

26-
response = json.dumps(result)
2722

28-
self.send_response(200)
29-
self.send_header("Content-type", "application/json")
30-
self.end_headers()
31-
self.wfile.write(bytes(response, "utf-8"))
23+
class CacheData(BaseModel):
24+
prompt: str
25+
answer: str = ""
3226

33-
# curl -X PUT -d "receive a hello message" "http://localhost:8000?prompt=hello"
34-
def do_PUT(self):
35-
params = self.path.split("?")[1]
36-
prompt = params.split("=")[1]
37-
content_length = int(self.headers.get("Content-Length", "0"))
38-
data = self.rfile.read(content_length).decode("utf-8")
3927

40-
put(prompt, data)
28+
@app.get("/")
29+
async def hello():
30+
return "hello gptcache server"
4131

42-
self.send_response(200)
43-
self.end_headers()
44-
self.wfile.write(bytes("successfully update the cache", "utf-8"))
4532

46-
# curl -X POST "http://localhost:8000?flush=true"
47-
def do_POST(self):
48-
params = self.path.split("?")[1]
49-
flush = params.split("=")[1]
50-
back_message = "currently only be used to flush the cache, like: example.com?flush=true"
51-
if flush == "true":
52-
cache.flush()
53-
self.send_response(200)
54-
back_message = "successfully flush the cache"
55-
else:
56-
self.send_response(404)
57-
self.end_headers()
58-
self.wfile.write(bytes(back_message, "utf-8"))
33+
@app.post("/put")
34+
async def put_cache(cache_data: CacheData) -> str:
35+
put(cache_data.prompt, cache_data.answer)
36+
return "successfully update the cache"
5937

6038

61-
def start_server(host: str, port: int):
62-
httpd = http.server.HTTPServer((host, port), GPTCacheHandler)
63-
print(f"Starting server at {host}:{port}")
64-
httpd.serve_forever()
39+
@app.post("/get")
40+
async def get_cache(cache_data: CacheData) -> CacheData:
41+
result = get(cache_data.prompt)
42+
return CacheData(prompt=cache_data.prompt, answer=result)
43+
44+
45+
@app.post("/flush")
46+
async def get_cache() -> str:
47+
cache.flush()
48+
return "successfully flush the cache"
6549

6650

6751
def main():
@@ -86,7 +70,7 @@ def main():
8670
else:
8771
init_similar_cache(args.cache_dir)
8872

89-
start_server(args.host, args.port)
73+
uvicorn.run(app, host=args.host, port=args.port)
9074

9175

9276
if __name__ == "__main__":

tests/unit_tests/test_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ def test_client():
99
mock_response.return_value = Mock(status_code=200)
1010
status_code = client.put("Hi", "Hi back")
1111
assert status_code == 200
12-
12+
1313
with patch("httpx.AsyncClient.get") as mock_response:
1414
m = Mock()
15-
attrs = {"json.return_value": "Hi back"}
15+
attrs = {"json.return_value": '{"answer": "Hi back"}'}
1616
m.configure_mock(**attrs)
1717
mock_response.return_value = m
1818
ans = client.get("Hi")

0 commit comments

Comments
 (0)