mirror of https://github.com/vllm-project/vllm
[Misc] Define common requirements (#3841)
This commit is contained in:
parent
9edec652e2
commit
cfaf49a167
|
@ -49,7 +49,7 @@ jobs:
|
||||||
matrix:
|
matrix:
|
||||||
os: ['ubuntu-20.04']
|
os: ['ubuntu-20.04']
|
||||||
python-version: ['3.8', '3.9', '3.10', '3.11']
|
python-version: ['3.8', '3.9', '3.10', '3.11']
|
||||||
pytorch-version: ['2.2.1'] # Must be the most recent version that meets requirements.txt.
|
pytorch-version: ['2.2.1'] # Must be the most recent version that meets requirements-cuda.txt.
|
||||||
cuda-version: ['11.8', '12.1']
|
cuda-version: ['11.8', '12.1']
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
|
|
@ -9,7 +9,7 @@ LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
# Install requirements
|
# Install requirements
|
||||||
$python_executable -m pip install wheel packaging
|
$python_executable -m pip install wheel packaging
|
||||||
$python_executable -m pip install -r requirements.txt
|
$python_executable -m pip install -r requirements-cuda.txt
|
||||||
|
|
||||||
# Limit the number of parallel jobs to avoid OOM
|
# Limit the number of parallel jobs to avoid OOM
|
||||||
export MAX_JOBS=1
|
export MAX_JOBS=1
|
||||||
|
|
|
@ -21,7 +21,6 @@ Express your support on Twitter if vLLM aids you, or simply offer your appreciat
|
||||||
### Build from source
|
### Build from source
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install -r requirements.txt
|
|
||||||
pip install -e . # This may take several minutes.
|
pip install -e . # This may take several minutes.
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -17,9 +17,10 @@ RUN ldconfig /usr/local/cuda-12.1/compat/
|
||||||
WORKDIR /workspace
|
WORKDIR /workspace
|
||||||
|
|
||||||
# install build and runtime dependencies
|
# install build and runtime dependencies
|
||||||
COPY requirements.txt requirements.txt
|
COPY requirements-common.txt requirements-common.txt
|
||||||
|
COPY requirements-cuda.txt requirements-cuda.txt
|
||||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||||
pip install -r requirements.txt
|
pip install -r requirements-cuda.txt
|
||||||
|
|
||||||
# install development dependencies
|
# install development dependencies
|
||||||
COPY requirements-dev.txt requirements-dev.txt
|
COPY requirements-dev.txt requirements-dev.txt
|
||||||
|
@ -51,7 +52,8 @@ COPY csrc csrc
|
||||||
COPY setup.py setup.py
|
COPY setup.py setup.py
|
||||||
COPY cmake cmake
|
COPY cmake cmake
|
||||||
COPY CMakeLists.txt CMakeLists.txt
|
COPY CMakeLists.txt CMakeLists.txt
|
||||||
COPY requirements.txt requirements.txt
|
COPY requirements-common.txt requirements-common.txt
|
||||||
|
COPY requirements-cuda.txt requirements-cuda.txt
|
||||||
COPY pyproject.toml pyproject.toml
|
COPY pyproject.toml pyproject.toml
|
||||||
COPY vllm vllm
|
COPY vllm vllm
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
include LICENSE
|
include LICENSE
|
||||||
include requirements.txt
|
include requirements-common.txt
|
||||||
|
include requirements-cuda.txt
|
||||||
include CMakeLists.txt
|
include CMakeLists.txt
|
||||||
|
|
||||||
recursive-include cmake *
|
recursive-include cmake *
|
||||||
|
|
|
@ -1,20 +1,14 @@
|
||||||
cmake>=3.21
|
cmake >= 3.21
|
||||||
ninja # For faster builds.
|
ninja # For faster builds.
|
||||||
psutil
|
psutil
|
||||||
ray >= 2.9
|
|
||||||
sentencepiece # Required for LLaMA tokenizer.
|
sentencepiece # Required for LLaMA tokenizer.
|
||||||
numpy
|
numpy
|
||||||
torch == 2.2.1
|
|
||||||
requests
|
requests
|
||||||
py-cpuinfo
|
py-cpuinfo
|
||||||
transformers >= 4.39.1 # Required for StarCoder2 & Llava.
|
transformers >= 4.39.1 # Required for StarCoder2 & Llava.
|
||||||
xformers == 0.0.25 # Requires PyTorch 2.2.1.
|
|
||||||
fastapi
|
fastapi
|
||||||
uvicorn[standard]
|
uvicorn[standard]
|
||||||
pydantic >= 2.0 # Required for OpenAI server.
|
pydantic >= 2.0 # Required for OpenAI server.
|
||||||
prometheus_client >= 0.18.0
|
prometheus_client >= 0.18.0
|
||||||
pynvml == 11.5.0
|
|
||||||
triton >= 2.1.0
|
|
||||||
outlines == 0.0.34
|
|
||||||
tiktoken == 0.6.0 # Required for DBRX tokenizer
|
tiktoken == 0.6.0 # Required for DBRX tokenizer
|
||||||
vllm-nccl-cu12>=2.18,<2.19 # for downloading nccl library
|
outlines == 0.0.34 # Requires torch >= 2.1.0
|
|
@ -1,15 +1,6 @@
|
||||||
cmake>=3.21
|
# Common dependencies
|
||||||
ninja # For faster builds.
|
-r requirements-common.txt
|
||||||
psutil
|
|
||||||
ray >= 2.9
|
# Dependencies for x86_64 CPUs
|
||||||
sentencepiece # Required for LLaMA tokenizer.
|
|
||||||
numpy
|
|
||||||
transformers >= 4.38.0 # Required for Gemma.
|
|
||||||
fastapi
|
|
||||||
uvicorn[standard]
|
|
||||||
pydantic >= 2.0 # Required for OpenAI server.
|
|
||||||
prometheus_client >= 0.18.0
|
|
||||||
torch == 2.2.1+cpu
|
torch == 2.2.1+cpu
|
||||||
triton >= 2.1.0
|
triton >= 2.1.0 # FIXME(woosuk): This is a hack to avoid import error.
|
||||||
filelock == 3.13.3
|
|
||||||
py-cpuinfo
|
|
||||||
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
# Common dependencies
|
||||||
|
-r requirements-common.txt
|
||||||
|
|
||||||
|
# Dependencies for NVIDIA GPUs
|
||||||
|
ray >= 2.9
|
||||||
|
pynvml == 11.5.0
|
||||||
|
vllm-nccl-cu12>=2.18,<2.19 # for downloading nccl library
|
||||||
|
torch == 2.2.1
|
||||||
|
xformers == 0.0.25 # Requires PyTorch 2.2.1
|
||||||
|
triton >= 2.1.0
|
|
@ -1,12 +1,7 @@
|
||||||
sentencepiece # Required for LLaMA tokenizer.
|
# Common dependencies
|
||||||
numpy
|
-r requirements-common.txt
|
||||||
|
|
||||||
|
# Dependencies for Neuron devices
|
||||||
transformers-neuronx >= 0.9.0
|
transformers-neuronx >= 0.9.0
|
||||||
torch-neuronx >= 2.1.0
|
torch-neuronx >= 2.1.0
|
||||||
neuronx-cc
|
neuronx-cc
|
||||||
fastapi
|
|
||||||
uvicorn[standard]
|
|
||||||
pydantic >= 2.0 # Required for OpenAI server.
|
|
||||||
prometheus_client >= 0.18.0
|
|
||||||
requests
|
|
||||||
psutil
|
|
||||||
py-cpuinfo
|
|
|
@ -1,18 +1,5 @@
|
||||||
cmake>=3.21
|
# Common dependencies
|
||||||
ninja # For faster builds.
|
-r requirements-common.txt
|
||||||
typing-extensions>=4.8.0
|
|
||||||
starlette
|
# Dependencies for AMD GPUs
|
||||||
requests
|
|
||||||
py-cpuinfo
|
|
||||||
psutil
|
|
||||||
ray == 2.9.3
|
ray == 2.9.3
|
||||||
sentencepiece # Required for LLaMA tokenizer.
|
|
||||||
numpy
|
|
||||||
tokenizers>=0.15.0
|
|
||||||
transformers >= 4.39.1 # Required for StarCoder2 & Llava.
|
|
||||||
fastapi
|
|
||||||
uvicorn[standard]
|
|
||||||
pydantic >= 2.0 # Required for OpenAI server.
|
|
||||||
prometheus_client >= 0.18.0
|
|
||||||
outlines == 0.0.34
|
|
||||||
tiktoken == 0.6.0 # Required for DBRX tokenizer
|
|
||||||
|
|
30
setup.py
30
setup.py
|
@ -325,32 +325,38 @@ def read_readme() -> str:
|
||||||
|
|
||||||
def get_requirements() -> List[str]:
|
def get_requirements() -> List[str]:
|
||||||
"""Get Python package dependencies from requirements.txt."""
|
"""Get Python package dependencies from requirements.txt."""
|
||||||
if _is_cuda():
|
|
||||||
with open(get_path("requirements.txt")) as f:
|
def _read_requirements(filename: str) -> List[str]:
|
||||||
|
with open(get_path(filename)) as f:
|
||||||
requirements = f.read().strip().split("\n")
|
requirements = f.read().strip().split("\n")
|
||||||
|
resolved_requirements = []
|
||||||
|
for line in requirements:
|
||||||
|
if line.startswith("-r "):
|
||||||
|
resolved_requirements += _read_requirements(line.split()[1])
|
||||||
|
else:
|
||||||
|
resolved_requirements.append(line)
|
||||||
|
return resolved_requirements
|
||||||
|
|
||||||
|
if _is_cuda():
|
||||||
|
requirements = _read_requirements("requirements-cuda.txt")
|
||||||
cuda_major = torch.version.cuda.split(".")[0]
|
cuda_major = torch.version.cuda.split(".")[0]
|
||||||
modified_requirements = []
|
modified_requirements = []
|
||||||
for req in requirements:
|
for req in requirements:
|
||||||
if "vllm-nccl-cu12" in req:
|
if "vllm-nccl-cu12" in req:
|
||||||
modified_requirements.append(
|
modified_requirements.append(
|
||||||
req.replace("vllm-nccl-cu12",
|
req.replace("vllm-nccl-cu12", f"vllm-nccl-cu{cuda_major}"))
|
||||||
f"vllm-nccl-cu{cuda_major}"))
|
|
||||||
else:
|
else:
|
||||||
modified_requirements.append(req)
|
modified_requirements.append(req)
|
||||||
requirements = modified_requirements
|
requirements = modified_requirements
|
||||||
elif _is_hip():
|
elif _is_hip():
|
||||||
with open(get_path("requirements-rocm.txt")) as f:
|
requirements = _read_requirements("requirements-rocm.txt")
|
||||||
requirements = f.read().strip().split("\n")
|
|
||||||
elif _is_neuron():
|
elif _is_neuron():
|
||||||
with open(get_path("requirements-neuron.txt")) as f:
|
requirements = _read_requirements("requirements-neuron.txt")
|
||||||
requirements = f.read().strip().split("\n")
|
|
||||||
elif _is_cpu():
|
elif _is_cpu():
|
||||||
with open(get_path("requirements-cpu.txt")) as f:
|
requirements = _read_requirements("requirements-cpu.txt")
|
||||||
requirements = f.read().strip().split("\n")
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Unsupported platform, please use CUDA, ROCM or Neuron.")
|
"Unsupported platform, please use CUDA, ROCm, Neuron, or CPU.")
|
||||||
|
|
||||||
return requirements
|
return requirements
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue