Skip to content

Commit 1d830ce

Browse files
authored
feat(mlx): add mlx backend (#6049)
* chore: allow to install with pip Signed-off-by: Ettore Di Giacinto <[email protected]> * WIP Signed-off-by: Ettore Di Giacinto <[email protected]> * Make the backend to build and actually work Signed-off-by: Ettore Di Giacinto <[email protected]> * List models from system only Signed-off-by: Ettore Di Giacinto <[email protected]> * Add script to build darwin python backends Signed-off-by: Ettore Di Giacinto <[email protected]> * Run protogen in libbackend Signed-off-by: Ettore Di Giacinto <[email protected]> * Detect if mps is available across python backends Signed-off-by: Ettore Di Giacinto <[email protected]> * CI: try to build backend Signed-off-by: Ettore Di Giacinto <[email protected]> * Debug CI Signed-off-by: Ettore Di Giacinto <[email protected]> * Fixups Signed-off-by: Ettore Di Giacinto <[email protected]> * Fixups Signed-off-by: Ettore Di Giacinto <[email protected]> * Index mlx-vlm Signed-off-by: Ettore Di Giacinto <[email protected]> * Remove mlx-vlm Signed-off-by: Ettore Di Giacinto <[email protected]> * Drop CI test Signed-off-by: Ettore Di Giacinto <[email protected]> --------- Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent 6dccfb0 commit 1d830ce

File tree

38 files changed

+926
-211
lines changed

38 files changed

+926
-211
lines changed

.github/workflows/backend.yml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,19 @@ jobs:
945945
backend: "kitten-tts"
946946
dockerfile: "./backend/Dockerfile.python"
947947
context: "./backend"
948+
mlx-darwin:
949+
uses: ./.github/workflows/backend_build_darwin.yml
950+
with:
951+
backend: "mlx"
952+
build-type: "mps"
953+
go-version: "1.24.x"
954+
tag-suffix: "-metal-darwin-arm64-mlx"
955+
runs-on: "macOS-14"
956+
secrets:
957+
dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
958+
dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
959+
quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
960+
quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
948961
llama-cpp-darwin:
949962
runs-on: macOS-14
950963
strategy:
@@ -1118,4 +1131,4 @@ jobs:
11181131
run: |
11191132
for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
11201133
crane push llama-cpp.tar $tag
1121-
done
1134+
done
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
---
2+
name: 'build darwin python backend container images (reusable)'
3+
4+
on:
5+
workflow_call:
6+
inputs:
7+
backend:
8+
description: 'Backend to build'
9+
required: true
10+
type: string
11+
build-type:
12+
description: 'Build type (e.g., mps)'
13+
default: ''
14+
type: string
15+
go-version:
16+
description: 'Go version to use'
17+
default: '1.24.x'
18+
type: string
19+
tag-suffix:
20+
description: 'Tag suffix for the built image'
21+
required: true
22+
type: string
23+
runs-on:
24+
description: 'Runner to use'
25+
default: 'macOS-14'
26+
type: string
27+
secrets:
28+
dockerUsername:
29+
required: false
30+
dockerPassword:
31+
required: false
32+
quayUsername:
33+
required: true
34+
quayPassword:
35+
required: true
36+
37+
jobs:
38+
darwin-backend-build:
39+
runs-on: ${{ inputs.runs-on }}
40+
strategy:
41+
matrix:
42+
go-version: ['${{ inputs.go-version }}']
43+
steps:
44+
- name: Clone
45+
uses: actions/checkout@v5
46+
with:
47+
submodules: true
48+
49+
- name: Setup Go ${{ matrix.go-version }}
50+
uses: actions/setup-go@v5
51+
with:
52+
go-version: ${{ matrix.go-version }}
53+
cache: false
54+
55+
# You can test your matrix by printing the current Go version
56+
- name: Display Go version
57+
run: go version
58+
59+
- name: Dependencies
60+
run: |
61+
brew install protobuf grpc make protoc-gen-go protoc-gen-go-grpc libomp llvm
62+
63+
- name: Build ${{ inputs.backend }}-darwin
64+
run: |
65+
make protogen-go
66+
BACKEND=${{ inputs.backend }} BUILD_TYPE=${{ inputs.build-type }} make build-darwin-python-backend
67+
68+
- name: Upload ${{ inputs.backend }}.tar
69+
uses: actions/upload-artifact@v4
70+
with:
71+
name: ${{ inputs.backend }}-tar
72+
path: backend-images/${{ inputs.backend }}.tar
73+
74+
darwin-backend-publish:
75+
needs: darwin-backend-build
76+
if: github.event_name != 'pull_request'
77+
runs-on: ubuntu-latest
78+
steps:
79+
- name: Download ${{ inputs.backend }}.tar
80+
uses: actions/download-artifact@v5
81+
with:
82+
name: ${{ inputs.backend }}-tar
83+
path: .
84+
85+
- name: Install crane
86+
run: |
87+
curl -L https://github.com/google/go-containerregistry/releases/latest/download/go-containerregistry_Linux_x86_64.tar.gz | tar -xz
88+
sudo mv crane /usr/local/bin/
89+
90+
- name: Log in to DockerHub
91+
run: |
92+
echo "${{ secrets.dockerPassword }}" | crane auth login docker.io -u "${{ secrets.dockerUsername }}" --password-stdin
93+
94+
- name: Log in to quay.io
95+
run: |
96+
echo "${{ secrets.quayPassword }}" | crane auth login quay.io -u "${{ secrets.quayUsername }}" --password-stdin
97+
98+
- name: Docker meta
99+
id: meta
100+
uses: docker/metadata-action@v5
101+
with:
102+
images: |
103+
localai/localai-backends
104+
tags: |
105+
type=ref,event=branch
106+
type=semver,pattern={{raw}}
107+
type=sha
108+
flavor: |
109+
latest=auto
110+
suffix=${{ inputs.tag-suffix }},onlatest=true
111+
112+
- name: Docker meta
113+
id: quaymeta
114+
uses: docker/metadata-action@v5
115+
with:
116+
images: |
117+
quay.io/go-skynet/local-ai-backends
118+
tags: |
119+
type=ref,event=branch
120+
type=semver,pattern={{raw}}
121+
type=sha
122+
flavor: |
123+
latest=auto
124+
suffix=${{ inputs.tag-suffix }},onlatest=true
125+
126+
- name: Push Docker image (DockerHub)
127+
run: |
128+
for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr ',' '\n'); do
129+
crane push ${{ inputs.backend }}.tar $tag
130+
done
131+
132+
- name: Push Docker image (Quay)
133+
run: |
134+
for tag in $(echo "${{ steps.quaymeta.outputs.tags }}" | tr ',' '\n'); do
135+
crane push ${{ inputs.backend }}.tar $tag
136+
done

Makefile

Lines changed: 45 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -132,43 +132,6 @@ test: test-models/testmodel.ggml protogen-go
132132
$(MAKE) test-tts
133133
$(MAKE) test-stablediffusion
134134

135-
backends/diffusers: docker-build-diffusers docker-save-diffusers build
136-
./local-ai backends install "ocifile://$(abspath ./backend-images/diffusers.tar)"
137-
138-
backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build
139-
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
140-
141-
backends/piper: docker-build-piper docker-save-piper build
142-
./local-ai backends install "ocifile://$(abspath ./backend-images/piper.tar)"
143-
144-
backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build
145-
./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
146-
147-
backends/whisper: docker-build-whisper docker-save-whisper build
148-
./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
149-
150-
backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
151-
./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"
152-
153-
backends/local-store: docker-build-local-store docker-save-local-store build
154-
./local-ai backends install "ocifile://$(abspath ./backend-images/local-store.tar)"
155-
156-
backends/huggingface: docker-build-huggingface docker-save-huggingface build
157-
./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)"
158-
159-
backends/rfdetr: docker-build-rfdetr docker-save-rfdetr build
160-
./local-ai backends install "ocifile://$(abspath ./backend-images/rfdetr.tar)"
161-
162-
backends/kitten-tts: docker-build-kitten-tts docker-save-kitten-tts build
163-
./local-ai backends install "ocifile://$(abspath ./backend-images/kitten-tts.tar)"
164-
165-
backends/kokoro: docker-build-kokoro docker-save-kokoro build
166-
./local-ai backends install "ocifile://$(abspath ./backend-images/kokoro.tar)"
167-
168-
backends/llama-cpp-darwin: build
169-
bash ./scripts/build-llama-cpp-darwin.sh
170-
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
171-
172135
########################################################
173136
## AIO tests
174137
########################################################
@@ -361,6 +324,51 @@ docker-image-intel:
361324
## Backends
362325
########################################################
363326

327+
328+
backends/diffusers: docker-build-diffusers docker-save-diffusers build
329+
./local-ai backends install "ocifile://$(abspath ./backend-images/diffusers.tar)"
330+
331+
backends/llama-cpp: docker-build-llama-cpp docker-save-llama-cpp build
332+
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
333+
334+
backends/piper: docker-build-piper docker-save-piper build
335+
./local-ai backends install "ocifile://$(abspath ./backend-images/piper.tar)"
336+
337+
backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-stablediffusion-ggml build
338+
./local-ai backends install "ocifile://$(abspath ./backend-images/stablediffusion-ggml.tar)"
339+
340+
backends/whisper: docker-build-whisper docker-save-whisper build
341+
./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
342+
343+
backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
344+
./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"
345+
346+
backends/local-store: docker-build-local-store docker-save-local-store build
347+
./local-ai backends install "ocifile://$(abspath ./backend-images/local-store.tar)"
348+
349+
backends/huggingface: docker-build-huggingface docker-save-huggingface build
350+
./local-ai backends install "ocifile://$(abspath ./backend-images/huggingface.tar)"
351+
352+
backends/rfdetr: docker-build-rfdetr docker-save-rfdetr build
353+
./local-ai backends install "ocifile://$(abspath ./backend-images/rfdetr.tar)"
354+
355+
backends/kitten-tts: docker-build-kitten-tts docker-save-kitten-tts build
356+
./local-ai backends install "ocifile://$(abspath ./backend-images/kitten-tts.tar)"
357+
358+
backends/kokoro: docker-build-kokoro docker-save-kokoro build
359+
./local-ai backends install "ocifile://$(abspath ./backend-images/kokoro.tar)"
360+
361+
backends/llama-cpp-darwin: build
362+
bash ./scripts/build/llama-cpp-darwin.sh
363+
./local-ai backends install "ocifile://$(abspath ./backend-images/llama-cpp.tar)"
364+
365+
build-darwin-python-backend:
366+
bash ./scripts/build/python-darwin.sh
367+
368+
backends/mlx: build
369+
BACKEND=mlx BUILD_TYPE=mps $(MAKE) build-darwin-python-backend
370+
./local-ai backends install "ocifile://$(abspath ./backend-images/mlx.tar)"
371+
364372
backend-images:
365373
mkdir -p backend-images
366374

backend/index.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,21 @@
127127
nvidia: "cuda12-vllm"
128128
amd: "rocm-vllm"
129129
intel: "intel-vllm"
130+
- &mlx
131+
name: "mlx"
132+
uri: "quay.io/go-skynet/local-ai-backends:latest-metal-darwin-arm64-mlx"
133+
icon: https://avatars.githubusercontent.com/u/102832242?s=200&v=4
134+
urls:
135+
- https://github.com/ml-explore/mlx-lm
136+
mirrors:
137+
- localai/localai-backends:latest-metal-darwin-arm64-mlx
138+
license: MIT
139+
description: |
140+
Run LLMs with MLX
141+
tags:
142+
- text-to-text
143+
- LLM
144+
- MLX
130145
- &rerankers
131146
name: "rerankers"
132147
alias: "rerankers"
@@ -371,6 +386,11 @@
371386
- text-to-speech
372387
- TTS
373388
license: apache-2.0
389+
- !!merge <<: *mlx
390+
name: "mlx-development"
391+
uri: "quay.io/go-skynet/local-ai-backends:master-metal-darwin-arm64-mlx"
392+
mirrors:
393+
- localai/localai-backends:master-metal-darwin-arm64-mlx
374394
- !!merge <<: *kitten-tts
375395
name: "kitten-tts-development"
376396
uri: "quay.io/go-skynet/local-ai-backends:master-kitten-tts"

backend/python/bark/Makefile

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,23 @@
11
.PHONY: ttsbark
2-
ttsbark: protogen
2+
ttsbark:
33
bash install.sh
44

55
.PHONY: run
6-
run: protogen
6+
run: ttsbark
77
@echo "Running bark..."
88
bash run.sh
99
@echo "bark run."
1010

1111
.PHONY: test
12-
test: protogen
12+
test: ttsbark
1313
@echo "Testing bark..."
1414
bash test.sh
1515
@echo "bark tested."
1616

17-
.PHONY: protogen
18-
protogen: backend_pb2_grpc.py backend_pb2.py
19-
2017
.PHONY: protogen-clean
2118
protogen-clean:
2219
$(RM) backend_pb2_grpc.py backend_pb2.py
2320

24-
backend_pb2_grpc.py backend_pb2.py:
25-
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto
26-
2721
.PHONY: clean
2822
clean: protogen-clean
2923
rm -rf venv __pycache__

backend/python/chatterbox/Makefile

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,23 @@
1-
.PHONY: coqui
2-
coqui: protogen
1+
.PHONY: chatterbox
2+
chatterbox:
33
bash install.sh
44

55
.PHONY: run
6-
run: protogen
6+
run: chatterbox
77
@echo "Running coqui..."
88
bash run.sh
99
@echo "coqui run."
1010

1111
.PHONY: test
12-
test: protogen
12+
test: chatterbox
1313
@echo "Testing coqui..."
1414
bash test.sh
1515
@echo "coqui tested."
1616

17-
.PHONY: protogen
18-
protogen: backend_pb2_grpc.py backend_pb2.py
19-
2017
.PHONY: protogen-clean
2118
protogen-clean:
2219
$(RM) backend_pb2_grpc.py backend_pb2.py
2320

24-
backend_pb2_grpc.py backend_pb2.py:
25-
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto
26-
2721
.PHONY: clean
2822
clean: protogen-clean
2923
rm -rf venv __pycache__

backend/python/chatterbox/backend.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ def LoadModel(self, request, context):
4141
else:
4242
print("CUDA is not available", file=sys.stderr)
4343
device = "cpu"
44-
44+
mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
45+
if mps_available:
46+
device = "mps"
4547
if not torch.cuda.is_available() and request.CUDA:
4648
return backend_pb2.Result(success=False, message="CUDA is not available")
4749

0 commit comments

Comments
 (0)