Skip to content

Commit ee5c335

Browse files
committed
feat(chatterbox): support multilingual, MPS, and CPU
Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent e4f8b63 commit ee5c335

File tree

6 files changed

+91
-10
lines changed

6 files changed

+91
-10
lines changed

.github/workflows/backend.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,18 @@ jobs:
111111
backend: "diffusers"
112112
dockerfile: "./backend/Dockerfile.python"
113113
context: "./backend"
114+
- build-type: ''
115+
cuda-major-version: ""
116+
cuda-minor-version: ""
117+
platforms: 'linux/amd64'
118+
tag-latest: 'auto'
119+
tag-suffix: '-cpu-chatterbox'
120+
runs-on: 'ubuntu-latest'
121+
base-image: "ubuntu:22.04"
122+
skip-drivers: 'true'
123+
backend: "chatterbox"
124+
dockerfile: "./backend/Dockerfile.python"
125+
context: "./backend"
114126
# CUDA 11 additional backends
115127
- build-type: 'cublas'
116128
cuda-major-version: "11"
@@ -968,6 +980,9 @@ jobs:
968980
- backend: "mlx"
969981
tag-suffix: "-metal-darwin-arm64-mlx"
970982
build-type: "mps"
983+
- backend: "chatterbox"
984+
tag-suffix: "-metal-darwin-arm64-chatterbox"
985+
build-type: "mps"
971986
- backend: "mlx-vlm"
972987
tag-suffix: "-metal-darwin-arm64-mlx-vlm"
973988
build-type: "mps"

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,9 @@ backends/kitten-tts: docker-build-kitten-tts docker-save-kitten-tts build
369369
backends/kokoro: docker-build-kokoro docker-save-kokoro build
370370
./local-ai backends install "ocifile://$(abspath ./backend-images/kokoro.tar)"
371371

372+
backends/chatterbox: docker-build-chatterbox docker-save-chatterbox build
373+
./local-ai backends install "ocifile://$(abspath ./backend-images/chatterbox.tar)"
374+
372375
backends/llama-cpp-darwin: build
373376
bash ./scripts/build/llama-cpp-darwin.sh
374377
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
@@ -493,7 +496,7 @@ docker-build-bark:
493496
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark .
494497

495498
docker-build-chatterbox:
496-
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox .
499+
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox ./backend
497500

498501
docker-build-exllama2:
499502
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .

backend/index.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,8 @@
350350
alias: "chatterbox"
351351
capabilities:
352352
nvidia: "cuda12-chatterbox"
353+
metal: "metal-chatterbox"
354+
default: "cpu-chatterbox"
353355
- &piper
354356
name: "piper"
355357
uri: "quay.io/go-skynet/local-ai-backends:latest-piper"
@@ -1223,6 +1225,28 @@
12231225
name: "chatterbox-development"
12241226
capabilities:
12251227
nvidia: "cuda12-chatterbox-development"
1228+
metal: "metal-chatterbox-development"
1229+
default: "cpu-chatterbox-development"
1230+
- !!merge <<: *chatterbox
1231+
name: "cpu-chatterbox"
1232+
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-chatterbox"
1233+
mirrors:
1234+
- localai/localai-backends:latest-cpu-chatterbox
1235+
- !!merge <<: *chatterbox
1236+
name: "cpu-chatterbox-development"
1237+
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-chatterbox"
1238+
mirrors:
1239+
- localai/localai-backends:master-cpu-chatterbox
1240+
- !!merge <<: *chatterbox
1241+
name: "metal-chatterbox"
1242+
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-chatterbox"
1243+
mirrors:
1244+
- localai/localai-backends:latest-metal-darwin-arm64-chatterbox
1245+
- !!merge <<: *chatterbox
1246+
name: "metal-chatterbox-development"
1247+
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-chatterbox"
1248+
mirrors:
1249+
- localai/localai-backends:master-metal-darwin-arm64-chatterbox
12261250
- !!merge <<: *chatterbox
12271251
name: "cuda12-chatterbox-development"
12281252
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-chatterbox"

backend/python/chatterbox/backend.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,15 @@
1414
import torch
1515
import torchaudio as ta
1616
from chatterbox.tts import ChatterboxTTS
17-
17+
from chatterbox.mtl_tts import ChatterboxMultilingualTTS
1818
import grpc
1919

20+
def is_float(s):
21+
try:
22+
float(s)
23+
return True
24+
except ValueError:
25+
return False
2026

2127
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
2228

@@ -47,6 +53,27 @@ def LoadModel(self, request, context):
4753
if not torch.cuda.is_available() and request.CUDA:
4854
return backend_pb2.Result(success=False, message="CUDA is not available")
4955

56+
57+
options = request.Options
58+
59+
# empty dict
60+
self.options = {}
61+
62+
# The options are a list of strings in this form optname:optvalue
63+
# We are storing all the options in a dict so we can use it later when
64+
# generating the images
65+
for opt in options:
66+
if ":" not in opt:
67+
continue
68+
key, value = opt.split(":")
69+
# if value is a number, convert it to the appropriate type
70+
if is_float(value):
71+
if value.is_integer():
72+
value = int(value)
73+
else:
74+
value = float(value)
75+
self.options[key] = value
76+
5077
self.AudioPath = None
5178

5279
if os.path.isabs(request.AudioPath):
@@ -56,10 +83,14 @@ def LoadModel(self, request, context):
5683
modelFileBase = os.path.dirname(request.ModelFile)
5784
# modify LoraAdapter to be relative to modelFileBase
5885
self.AudioPath = os.path.join(modelFileBase, request.AudioPath)
59-
6086
try:
6187
print("Preparing models, please wait", file=sys.stderr)
62-
self.model = ChatterboxTTS.from_pretrained(device=device)
88+
if "multilingual" in self.options:
89+
# remove key from options
90+
del self.options["multilingual"]
91+
self.model = ChatterboxMultilingualTTS.from_pretrained(device=device)
92+
else:
93+
self.model = ChatterboxTTS.from_pretrained(device=device)
6394
except Exception as err:
6495
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
6596
# Implement your logic here for the LoadModel service
@@ -68,12 +99,18 @@ def LoadModel(self, request, context):
6899

69100
def TTS(self, request, context):
70101
try:
71-
# Generate audio using ChatterboxTTS
102+
kwargs = {}
103+
104+
if "language" in self.options:
105+
kwargs["language_id"] = self.options["language"]
72106
if self.AudioPath is not None:
73-
wav = self.model.generate(request.text, audio_prompt_path=self.AudioPath)
74-
else:
75-
wav = self.model.generate(request.text)
76-
107+
kwargs["audio_prompt_path"] = self.AudioPath
108+
109+
# add options to kwargs
110+
kwargs.update(self.options)
111+
112+
# Generate audio using ChatterboxTTS
113+
wav = self.model.generate(request.text, **kwargs)
77114
# Save the generated audio
78115
ta.save(request.dst, wav, self.model.sr)
79116

backend/python/chatterbox/requirements-cpu.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
--extra-index-url https://download.pytorch.org/whl/cpu
12
accelerate
23
torch==2.6.0
34
torchaudio==2.6.0

backend/python/chatterbox/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ grpcio==1.71.0
22
protobuf
33
certifi
44
packaging
5-
setuptools
5+
setuptools
6+
numpy>=1.24.0,<1.26.0

0 commit comments

Comments
 (0)