diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 641adfb5b4b4..d4ac24c8fab5 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -89,7 +89,7 @@ jobs: context: "./backend" - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-l4t-diffusers' @@ -175,7 +175,7 @@ jobs: # CUDA 12 builds - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rerankers' @@ -187,7 +187,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp' @@ -199,7 +199,7 @@ jobs: context: "./" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-vllm' @@ -211,7 +211,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-transformers' @@ -223,7 +223,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-diffusers' @@ -236,7 +236,7 @@ jobs: # CUDA 12 additional backends - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-kokoro' @@ -248,7 +248,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper' @@ -260,7 +260,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-coqui' @@ -272,7 +272,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-bark' @@ -284,7 +284,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-chatterbox' @@ -566,7 +566,7 @@ jobs: context: "./" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -603,7 +603,7 @@ jobs: context: "./" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' @@ -663,7 +663,7 @@ jobs: context: "./" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -688,7 +688,7 @@ jobs: context: "./" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-whisper' @@ -748,7 +748,7 @@ jobs: context: "./" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -824,7 +824,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' @@ -860,7 +860,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -885,7 +885,7 @@ jobs: context: "./backend" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-exllama2' diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index a2410b22827e..6bcb995ba59a 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -36,7 +36,7 @@ jobs: include: - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-gpu-nvidia-cuda-12' diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 3864930d03ed..5b12ba07033f 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -91,7 +91,7 @@ jobs: aio: "-aio-gpu-nvidia-cuda-11" - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12' @@ -144,7 +144,7 @@ jobs: include: - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "8" platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64' diff --git a/Dockerfile b/Dockerfile index 1e356014dec3..9bf6964c3f60 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,7 @@ FROM requirements AS requirements-drivers ARG BUILD_TYPE ARG CUDA_MAJOR_VERSION=12 -ARG CUDA_MINOR_VERSION=0 +ARG CUDA_MINOR_VERSION=8 ARG SKIP_DRIVERS=false ARG TARGETARCH ARG TARGETVARIANT diff --git a/Makefile b/Makefile index 46b3bcdec4ee..8e5edd26592f 100644 --- a/Makefile +++ b/Makefile @@ -170,7 +170,7 @@ prepare-e2e: mkdir -p $(TEST_DIR) cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin - docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests . + docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=8 -t localai-tests . run-e2e-image: ls -liah $(abspath ./tests/e2e-fixtures) diff --git a/backend/README.md b/backend/README.md index 87fd9f28f89c..8d68d613ec4d 100644 --- a/backend/README.md +++ b/backend/README.md @@ -111,7 +111,7 @@ docker build -f backend/Dockerfile.python \ --build-arg BACKEND=transformers \ --build-arg BUILD_TYPE=cublas12 \ --build-arg CUDA_MAJOR_VERSION=12 \ - --build-arg CUDA_MINOR_VERSION=0 \ + --build-arg CUDA_MINOR_VERSION=8 \ -t localai-backend-transformers . # Build Go backend