mirror of https://github.com/vllm-project/vllm
78 lines
2.1 KiB
Docker
78 lines
2.1 KiB
Docker
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS dev
|
|
|
|
RUN apt-get update -y \
|
|
&& apt-get install -y python3-pip
|
|
|
|
WORKDIR /workspace
|
|
|
|
# install build and runtime dependencies
|
|
COPY requirements.txt requirements.txt
|
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
pip install -r requirements.txt
|
|
|
|
# install development dependencies
|
|
COPY requirements-dev.txt requirements-dev.txt
|
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
pip install -r requirements-dev.txt
|
|
|
|
# image to build pytorch extensions
|
|
FROM dev AS build
|
|
|
|
# install build dependencies
|
|
COPY requirements-build.txt requirements-build.txt
|
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
pip install -r requirements-build.txt
|
|
|
|
# copy input files
|
|
COPY csrc csrc
|
|
COPY setup.py setup.py
|
|
COPY requirements.txt requirements.txt
|
|
COPY pyproject.toml pyproject.toml
|
|
COPY vllm/__init__.py vllm/__init__.py
|
|
|
|
# max jobs used by Ninja to build extensions
|
|
ENV MAX_JOBS=$max_jobs
|
|
RUN python3 setup.py build_ext --inplace
|
|
|
|
# image to run unit testing suite
|
|
FROM dev AS test
|
|
|
|
# copy pytorch extensions separately to avoid having to rebuild
|
|
# when python code changes
|
|
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
|
|
COPY tests tests
|
|
COPY vllm vllm
|
|
|
|
ENTRYPOINT ["python3", "-m", "pytest", "tests"]
|
|
|
|
# use CUDA base as CUDA runtime dependencies are already installed via pip
|
|
FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS vllm-base
|
|
|
|
# libnccl required for ray
|
|
RUN apt-get update -y \
|
|
&& apt-get install -y python3-pip
|
|
|
|
WORKDIR /workspace
|
|
COPY requirements.txt requirements.txt
|
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
pip install -r requirements.txt
|
|
|
|
FROM vllm-base AS vllm
|
|
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
|
|
COPY vllm vllm
|
|
|
|
EXPOSE 8000
|
|
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.api_server"]
|
|
|
|
# openai api server alternative
|
|
FROM vllm-base AS vllm-openai
|
|
# install additional dependencies for openai api server
|
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
|
pip install accelerate fschat
|
|
|
|
COPY --from=build /workspace/vllm/*.so /workspace/vllm/
|
|
COPY vllm vllm
|
|
|
|
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
|
|