mirror of https://github.com/vllm-project/vllm
Update Dockerfile to build Megablocks (#2042)
This commit is contained in:
parent
6428f1d051
commit
3fefe271ec
18
Dockerfile
18
Dockerfile
|
@ -41,6 +41,14 @@ ENV NVCC_THREADS=$nvcc_threads
|
|||
|
||||
RUN python3 setup.py build_ext --inplace
|
||||
|
||||
# Build the megablocks library as wheel because it doesn't publish pre-built wheels.
|
||||
# https://github.com/stanford-futuredata/megablocks/commit/5897cd6f254b7b3edf7a708a3a3314ecb54b6f78
|
||||
RUN apt-get install -y git && \
|
||||
git clone https://github.com/stanford-futuredata/megablocks.git && \
|
||||
cd megablocks && \
|
||||
git checkout 5897cd6f254b7b3edf7a708a3a3314ecb54b6f78 && \
|
||||
MAX_JOBS=8 NVCC_THREADS=8 python3 setup.py bdist_wheel
|
||||
|
||||
# image to run unit testing suite
|
||||
FROM dev AS test
|
||||
|
||||
|
@ -73,12 +81,16 @@ ENTRYPOINT ["python3", "-m", "vllm.entrypoints.api_server"]
|
|||
|
||||
# openai api server alternative
|
||||
FROM vllm-base AS vllm-openai
|
||||
# install additional dependencies for openai api server, and mixtral
|
||||
# install additional dependencies for openai api server
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install accelerate megablocks
|
||||
pip install accelerate
|
||||
|
||||
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
|
||||
COPY vllm vllm
|
||||
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
|
||||
COPY --from=build /workspace/megablocks/dist/*.whl /tmp/
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install /tmp/megablocks-0.5.0-cp310-cp310-linux_x86_64.whl && \
|
||||
rm /tmp/megablocks-0.5.0-cp310-cp310-linux_x86_64.whl
|
||||
|
||||
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
||||
|
||||
|
|
Loading…
Reference in New Issue