Skip to content

Commit 464c3ff

Browse files
committed
update docker file
Signed-off-by: Wang, Yi A <[email protected]>
1 parent b3989cf commit 464c3ff

File tree

4 files changed

+34
-8
lines changed

4 files changed

+34
-8
lines changed

Dockerfile_intel

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
# Rust builder
2-
FROM lukemathwalker/cargo-chef:latest-rust-1.71 AS chef
1+
FROM lukemathwalker/cargo-chef:latest-rust-1.75 AS chef
32
WORKDIR /usr/src
43

54
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
@@ -35,6 +34,7 @@ COPY router router
3534
COPY launcher launcher
3635
RUN cargo build --release
3736

37+
3838
# Text Generation Inference base image for Intel
3939
FROM intel/intel-extension-for-pytorch:2.1.10-xpu as base
4040

@@ -47,22 +47,49 @@ RUN wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.
4747
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
4848
| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list
4949

50-
RUN apt-get update && apt install -y intel-basekit xpu-smi
50+
RUN apt-get update && apt install -y intel-basekit xpu-smi cmake python3-dev
5151

5252
# Text Generation Inference base env
5353
ENV HUGGINGFACE_HUB_CACHE=/data \
5454
HF_HUB_ENABLE_HF_TRANSFER=1 \
5555
PORT=80
5656

5757

58+
WORKDIR /usr/src
59+
# Build pytorch and ipex
60+
RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout -b xpu_main origin/xpu-main
61+
RUN git clone https://github.com/pytorch/pytorch.git && cd pytorch && git checkout 209f2fa8ff86652f67d75c2f19bf9cb9942fd018 && git apply /usr/src/intel-extension-for-pytorch/torch_patches/00*.patch
62+
5863
# Install server
5964
COPY proto proto
6065
COPY server server
6166
COPY server/Makefile server/Makefile
6267
RUN cd server && \
6368
make gen-server && \
6469
pip install -r requirements_common.txt && \
65-
pip install ".[accelerate, peft]" --no-cache-dir
70+
pip install ".[accelerate, peft, outlines]" --no-cache-dir
71+
72+
ENV CCL_ROOT=/opt/intel/oneapi/ccl/2021.11
73+
ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/2021.11
74+
ENV FI_PROVIDER_PATH=/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric
75+
ENV DIAGUTIL_PATH=/opt/intel/oneapi/compiler/2024.0/etc/compiler/sys_check/sys_check.sh
76+
ENV CCL_CONFIGURATION=cpu_gpu_dpcpp
77+
ENV MANPATH=/opt/intel/oneapi/mpi/2021.11/share/man:/opt/intel/oneapi/mpi/2021.11/share/man:/opt/intel/oneapi/compiler/2024.0/documentation/en/man/common:
78+
ENV CMAKE_PREFIX_PATH=/opt/intel/oneapi/mkl/2024.0/lib/cmake:/opt/intel/oneapi/compiler/2024.0
79+
ENV CMPLR_ROOT=/opt/intel/oneapi/compiler/2024.0
80+
ENV LIBRARY_PATH=/opt/intel/oneapi/mpi/2021.11/lib:/opt/intel/oneapi/ccl/2021.11/lib/:/opt/intel/oneapi/mkl/2024.0/lib/:/opt/intel/oneapi/compiler/2024.0/lib
81+
ENV OCL_ICD_FILENAMES=libintelocl_emu.so:libalteracl.so:/opt/intel/oneapi/compiler/2024.0/lib/libintelocl.so
82+
ENV CLASSPATH=/opt/intel/oneapi/mpi/2021.11/share/java/mpi.jar:/opt/intel/oneapi/mpi/2021.11/share/java/mpi.jar
83+
ENV LD_LIBRARY_PATH=/opt/intel/oneapi/ccl/2021.11/lib/:/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/2021.11/lib:/opt/intel/oneapi/mkl/2024.0/lib:/opt/intel/oneapi/compiler/2024.0/opt/compiler/lib:/opt/intel/oneapi/compiler/2024.0/lib:/opt/intel/oneapi/lib:/opt/intel/oneapi/lib/intel64:
84+
ENV MKLROOT=/opt/intel/oneapi/mkl/2024.0
85+
ENV NLSPATH=/opt/intel/oneapi/mkl/2024.0/share/locale/%l_%t/%N:/opt/intel/oneapi/compiler/2024.0/lib/locale/%l_%t/%N
86+
ENV PATH=/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/2021.11/bin:/opt/intel/oneapi/mpi/2021.11/opt/mpi/libfabric/bin:/opt/intel/oneapi/mkl/2024.0/bin/:/opt/intel/oneapi/compiler/2024.0/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
87+
ENV CPATH=/opt/intel/oneapi/mpi/2021.11/include:/opt/intel/oneapi/ccl/2021.11/include:/opt/intel/oneapi/mkl/2024.0/include
88+
ENV CCL_ZE_IPC_EXCHANGE=sockets
89+
90+
91+
RUN pip uninstall -y torch && cd pytorch && git submodule update --init --recursive && python setup.py install
92+
RUN pip uninstall -y intel-extension-for-pytorch && cd intel-extension-for-pytorch && git submodule update --init --recursive && USE_AOT_DEVLIST='pvc' BUILD_SEPARATE_OPS=ON BUILD_WITH_CPU=ON USE_XETLA=ON python setup.py install
6693

6794
# Install benchmarker
6895
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark

server/text_generation_server/models/flash_mistral.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
SLIDING_WINDOW_BLOCKS: Optional[int] = None
3737
from text_generation_server.utils.import_utils import IS_XPU_SYSTEM
3838

39-
MEM_POOL = torch.cuda.graph_pool_handle()
39+
MEM_POOL = torch.cuda.graph_pool_handle() if torch.cuda.is_available() else None
4040

4141

4242
def set_sliding_window(sliding_window: int, sliding_window_blocks: int):
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import torch
22
import os
33

4-
MEM_POOL = torch.cuda.graph_pool_handle()
4+
MEM_POOL = torch.cuda.graph_pool_handle() if torch.cuda.is_available() else None
55
# This is overridden by the cli
66
ENABLE_CUDA_GRAPHS = os.getenv("ENABLE_CUDA_GRAPHS", "false").lower() in {"1", "true"}

server/text_generation_server/utils/layers.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -713,11 +713,10 @@ def forward(self, hidden_states, residual=None):
713713
residual = hidden_states
714714
out = ipex.llm.modules.RMSNorm.apply(
715715
hidden_states,
716-
[hidden_states.size(-1)],
717716
self.weight,
718717
self.variance_epsilon,
719718
)
720-
return out[0], residual
719+
return out, residual
721720
elif hidden_states.shape[-1] > 8192:
722721
if residual is not None:
723722
hidden_states += residual

0 commit comments

Comments
 (0)