Use the fastapi to improve the GPTCache server

SimFG · SimFG · commit b21633c1a6c5 · 2023-06-02T15:22:20.000+08:00
Signed-off-by: SimFG &lt;bang.fu@zilliz.com&gt;
diff --git a/docs/usage.md b/docs/usage.md
@@ -336,18 +336,41 @@ $ docker run -p 8000:8000 -it zilliz/gptcache:latest
 GPTCache supports two ways of interaction with the server:
 
 - With command line:
-    ```shell
-    $ curl -X PUT -d "receive a hello message" "http://localhost:8000?prompt=hello"
-    $ curl -X GET  "http://localhost:8000?prompt=hello"
-    "receive a hello message"
-    ```
+
+put the data to cache
+
+```shell
+curl -X 'POST' \
+  'http://localhost:8000/put' \
+  -H 'accept: application/json' \
+  -H 'Content-Type: application/json' \
+  -d '{
+  "prompt": "Hi",
+  "answer": "Hi back"
+}'
+```
+
+get the data from the cache
+
+```shell
+curl -X 'POST' \
+  'http://localhost:8000/get' \
+  -H 'accept: application/json' \
+  -H 'Content-Type: application/json' \
+  -d '{
+  "prompt": "Hi"
+}'
+```
+
+
 - With python client:
-    ```python
-    >>> from gptcache import Client
 
-    >>> client = Client(uri="http://localhost:8000")
-    >>> client.put("Hi", "Hi back")
-    200
-    >>> client.get("Hi")
-    'Hi back'
-    ```
+```python
+ >>> from gptcache.client import Client
+
+ >>> client = Client(uri="http://localhost:8000")
+ >>> client.put("Hi", "Hi back")
+ 200
+ >>> client.get("Hi")
+ 'Hi back'
+ ```
diff --git a/examples/README.md b/examples/README.md
@@ -627,21 +627,44 @@ Also, you can start the service in a docker container:
 GPTCache supports two ways of interaction with the server:
 
 - With command line:
-    ```shell
-    $ curl -X PUT -d "receive a hello message" "http://localhost:8000?prompt=hello"
-    $ curl -X GET  "http://localhost:8000?prompt=hello"
-    "receive a hello message"
-    ```
+
+put the data to cache
+
+```shell
+curl -X 'POST' \
+  'http://localhost:8000/put' \
+  -H 'accept: application/json' \
+  -H 'Content-Type: application/json' \
+  -d '{
+  "prompt": "Hi",
+  "answer": "Hi back"
+}'
+```
+
+get the data from the cache
+
+```shell
+curl -X 'POST' \
+  'http://localhost:8000/get' \
+  -H 'accept: application/json' \
+  -H 'Content-Type: application/json' \
+  -d '{
+  "prompt": "Hi"
+}'
+```
+
+
 - With python client:
-    ```python
-    >>> from gptcache import Client
 
-    >>> client = Client(uri="http://localhost:8000")
-    >>> client.put("Hi", "Hi back")
-    200
-    >>> client.get("Hi")
-    'Hi back'
-    ```
+```python
+ >>> from gptcache.client import Client
+
+ >>> client = Client(uri="http://localhost:8000")
+ >>> client.put("Hi", "Hi back")
+ 200
+ >>> client.get("Hi")
+ 'Hi back'
+ ```
 
 ## [Benchmark](https://github.com/zilliztech/GPTCache/tree/main/examples/benchmark/benchmark_sqlite_faiss_onnx.py)
 
diff --git a/gptcache/client.py b/gptcache/client.py
@@ -1,4 +1,5 @@
 import asyncio
+import json
 
 from gptcache.utils import import_httpx
 
@@ -7,6 +8,9 @@
 import httpx  # pylint: disable=C0413
 
 
+_CLIENT_HEADER = {"Content-Type": "application/json", "Accept": "application/json"}
+
+
 class Client:
     """GPTCache client to send requests to GPTCache server.
 
@@ -28,20 +32,28 @@ def __init__(self, uri: str = "http://localhost:8000"):
 
     async def _put(self, question: str, answer: str):
         async with httpx.AsyncClient() as client:
-            headers = {"Content-Type": "application/x-www-form-urlencoded"}
-            params = {"prompt": question}
-            data = answer
+            data = {
+                "prompt": question,
+                "answer": answer,
+            }
 
-            response = await client.put(self._uri, params=params, headers=headers, data=data)
+            response = await client.post(
+                f"{self._uri}/put", headers=_CLIENT_HEADER, data=json.dumps(data)
+            )
 
         return response.status_code
 
     async def _get(self, question: str):
         async with httpx.AsyncClient() as client:
-            params = {"prompt": question}
-            response = await client.get(self._uri, params=params)
+            data = {
+                "prompt": question,
+            }
+
+            response = await client.post(
+                f"{self._uri}/get", headers=_CLIENT_HEADER, data=json.dumps(data)
+            )
 
-        return response.json()
+        return response.json().get("answer")
 
     def put(self, question: str, answer: str):
         """
diff --git a/gptcache/utils/__init__.py b/gptcache/utils/__init__.py
@@ -37,6 +37,7 @@
     "import_paddle",
     "import_paddlenlp",
     "import_tiktoken",
+    "import_fastapi",
     ]
 
 import importlib.util
@@ -235,3 +236,8 @@ def import_paddlenlp():
 
 def import_tiktoken():
     _check_library("tiktoken")
+
+
+def import_fastapi():
+    _check_library("uvicorn", package="'uvicorn[standard]'")
+    _check_library("fastapi")
diff --git a/gptcache_server/__init__.py b/gptcache_server/__init__.py
@@ -1 +0,0 @@
-from gptcache_server.server import start_server
diff --git a/gptcache_server/server.py b/gptcache_server/server.py
@@ -1,6 +1,4 @@
 import argparse
-import http.server
-import json
 
 from gptcache import cache
 from gptcache.adapter.api import (
@@ -9,59 +7,45 @@
     init_similar_cache,
     init_similar_cache_from_config,
 )
+from gptcache.utils import import_fastapi, import_pydantic
 
+import_fastapi()
+import_pydantic()
 
-class GPTCacheHandler(http.server.BaseHTTPRequestHandler):
-    """
-    HTTPServer handler for GPTCache Service.
-    """
+from fastapi import FastAPI
+import uvicorn
+from pydantic import BaseModel
 
-    # curl -X GET  "http://localhost:8000?prompt=hello"
-    def do_GET(self):
-        params = self.path.split("?")[1]
-        prompt = params.split("=")[1]
 
-        result = get(prompt)
+app = FastAPI()
 
-        response = json.dumps(result)
 
-        self.send_response(200)
-        self.send_header("Content-type", "application/json")
-        self.end_headers()
-        self.wfile.write(bytes(response, "utf-8"))
+class CacheData(BaseModel):
+    prompt: str
+    answer: str = ""
 
-    # curl -X PUT -d "receive a hello message" "http://localhost:8000?prompt=hello"
-    def do_PUT(self):
-        params = self.path.split("?")[1]
-        prompt = params.split("=")[1]
-        content_length = int(self.headers.get("Content-Length", "0"))
-        data = self.rfile.read(content_length).decode("utf-8")
 
-        put(prompt, data)
+@app.get("/")
+async def hello():
+    return "hello gptcache server"
 
-        self.send_response(200)
-        self.end_headers()
-        self.wfile.write(bytes("successfully update the cache", "utf-8"))
 
-    # curl -X POST "http://localhost:8000?flush=true"
-    def do_POST(self):
-        params = self.path.split("?")[1]
-        flush = params.split("=")[1]
-        back_message = "currently only be used to flush the cache, like: example.com?flush=true"
-        if flush == "true":
-            cache.flush()
-            self.send_response(200)
-            back_message = "successfully flush the cache"
-        else:
-            self.send_response(404)
-        self.end_headers()
-        self.wfile.write(bytes(back_message, "utf-8"))
+@app.post("/put")
+async def put_cache(cache_data: CacheData) -> str:
+    put(cache_data.prompt, cache_data.answer)
+    return "successfully update the cache"
 
 
-def start_server(host: str, port: int):
-    httpd = http.server.HTTPServer((host, port), GPTCacheHandler)
-    print(f"Starting server at {host}:{port}")
-    httpd.serve_forever()
+@app.post("/get")
+async def get_cache(cache_data: CacheData) -> CacheData:
+    result = get(cache_data.prompt)
+    return CacheData(prompt=cache_data.prompt, answer=result)
+
+
+@app.post("/flush")
+async def get_cache() -> str:
+    cache.flush()
+    return "successfully flush the cache"
 
 
 def main():
@@ -86,7 +70,7 @@ def main():
     else:
         init_similar_cache(args.cache_dir)
 
-    start_server(args.host, args.port)
+    uvicorn.run(app, host=args.host, port=args.port)
 
 
 if __name__ == "__main__":
diff --git a/tests/unit_tests/test_client.py b/tests/unit_tests/test_client.py
@@ -9,10 +9,10 @@ def test_client():
         mock_response.return_value = Mock(status_code=200)
         status_code = client.put("Hi", "Hi back")
         assert status_code == 200
-    
+
     with patch("httpx.AsyncClient.get") as mock_response:
         m = Mock()
-        attrs = {"json.return_value": "Hi back"}
+        attrs = {"json.return_value": '{"answer": "Hi back"}'}
         m.configure_mock(**attrs)
         mock_response.return_value = m
         ans = client.get("Hi")

Original file line number	Diff line number	Diff line change
`@@ -1 +0,0 @@`
`1`		`-from gptcache_server.server import start_server`