mirror of https://github.com/vllm-project/vllm
[Misc] Define common requirements (#3841)
This commit is contained in:
parent
9edec652e2
commit
cfaf49a167
|
@ -49,7 +49,7 @@ jobs:
|
|||
matrix:
|
||||
os: ['ubuntu-20.04']
|
||||
python-version: ['3.8', '3.9', '3.10', '3.11']
|
||||
pytorch-version: ['2.2.1'] # Must be the most recent version that meets requirements.txt.
|
||||
pytorch-version: ['2.2.1'] # Must be the most recent version that meets requirements-cuda.txt.
|
||||
cuda-version: ['11.8', '12.1']
|
||||
|
||||
steps:
|
||||
|
|
|
@ -9,7 +9,7 @@ LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH
|
|||
|
||||
# Install requirements
|
||||
$python_executable -m pip install wheel packaging
|
||||
$python_executable -m pip install -r requirements.txt
|
||||
$python_executable -m pip install -r requirements-cuda.txt
|
||||
|
||||
# Limit the number of parallel jobs to avoid OOM
|
||||
export MAX_JOBS=1
|
||||
|
|
|
@ -21,7 +21,6 @@ Express your support on Twitter if vLLM aids you, or simply offer your appreciat
|
|||
### Build from source
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
pip install -e . # This may take several minutes.
|
||||
```
|
||||
|
||||
|
|
|
@ -17,9 +17,10 @@ RUN ldconfig /usr/local/cuda-12.1/compat/
|
|||
WORKDIR /workspace
|
||||
|
||||
# install build and runtime dependencies
|
||||
COPY requirements.txt requirements.txt
|
||||
COPY requirements-common.txt requirements-common.txt
|
||||
COPY requirements-cuda.txt requirements-cuda.txt
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip install -r requirements.txt
|
||||
pip install -r requirements-cuda.txt
|
||||
|
||||
# install development dependencies
|
||||
COPY requirements-dev.txt requirements-dev.txt
|
||||
|
@ -51,7 +52,8 @@ COPY csrc csrc
|
|||
COPY setup.py setup.py
|
||||
COPY cmake cmake
|
||||
COPY CMakeLists.txt CMakeLists.txt
|
||||
COPY requirements.txt requirements.txt
|
||||
COPY requirements-common.txt requirements-common.txt
|
||||
COPY requirements-cuda.txt requirements-cuda.txt
|
||||
COPY pyproject.toml pyproject.toml
|
||||
COPY vllm vllm
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
include LICENSE
|
||||
include requirements.txt
|
||||
include requirements-common.txt
|
||||
include requirements-cuda.txt
|
||||
include CMakeLists.txt
|
||||
|
||||
recursive-include cmake *
|
||||
|
|
|
@ -1,20 +1,14 @@
|
|||
cmake>=3.21
|
||||
cmake >= 3.21
|
||||
ninja # For faster builds.
|
||||
psutil
|
||||
ray >= 2.9
|
||||
sentencepiece # Required for LLaMA tokenizer.
|
||||
numpy
|
||||
torch == 2.2.1
|
||||
requests
|
||||
py-cpuinfo
|
||||
transformers >= 4.39.1 # Required for StarCoder2 & Llava.
|
||||
xformers == 0.0.25 # Requires PyTorch 2.2.1.
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
pydantic >= 2.0 # Required for OpenAI server.
|
||||
prometheus_client >= 0.18.0
|
||||
pynvml == 11.5.0
|
||||
triton >= 2.1.0
|
||||
outlines == 0.0.34
|
||||
tiktoken == 0.6.0 # Required for DBRX tokenizer
|
||||
vllm-nccl-cu12>=2.18,<2.19 # for downloading nccl library
|
||||
tiktoken == 0.6.0 # Required for DBRX tokenizer
|
||||
outlines == 0.0.34 # Requires torch >= 2.1.0
|
|
@ -1,15 +1,6 @@
|
|||
cmake>=3.21
|
||||
ninja # For faster builds.
|
||||
psutil
|
||||
ray >= 2.9
|
||||
sentencepiece # Required for LLaMA tokenizer.
|
||||
numpy
|
||||
transformers >= 4.38.0 # Required for Gemma.
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
pydantic >= 2.0 # Required for OpenAI server.
|
||||
prometheus_client >= 0.18.0
|
||||
torch == 2.2.1+cpu
|
||||
triton >= 2.1.0
|
||||
filelock == 3.13.3
|
||||
py-cpuinfo
|
||||
# Common dependencies
|
||||
-r requirements-common.txt
|
||||
|
||||
# Dependencies for x86_64 CPUs
|
||||
torch == 2.2.1+cpu
|
||||
triton >= 2.1.0 # FIXME(woosuk): This is a hack to avoid import error.
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
# Common dependencies
|
||||
-r requirements-common.txt
|
||||
|
||||
# Dependencies for NVIDIA GPUs
|
||||
ray >= 2.9
|
||||
pynvml == 11.5.0
|
||||
vllm-nccl-cu12>=2.18,<2.19 # for downloading nccl library
|
||||
torch == 2.2.1
|
||||
xformers == 0.0.25 # Requires PyTorch 2.2.1
|
||||
triton >= 2.1.0
|
|
@ -1,12 +1,7 @@
|
|||
sentencepiece # Required for LLaMA tokenizer.
|
||||
numpy
|
||||
# Common dependencies
|
||||
-r requirements-common.txt
|
||||
|
||||
# Dependencies for Neuron devices
|
||||
transformers-neuronx >= 0.9.0
|
||||
torch-neuronx >= 2.1.0
|
||||
neuronx-cc
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
pydantic >= 2.0 # Required for OpenAI server.
|
||||
prometheus_client >= 0.18.0
|
||||
requests
|
||||
psutil
|
||||
py-cpuinfo
|
|
@ -1,18 +1,5 @@
|
|||
cmake>=3.21
|
||||
ninja # For faster builds.
|
||||
typing-extensions>=4.8.0
|
||||
starlette
|
||||
requests
|
||||
py-cpuinfo
|
||||
psutil
|
||||
# Common dependencies
|
||||
-r requirements-common.txt
|
||||
|
||||
# Dependencies for AMD GPUs
|
||||
ray == 2.9.3
|
||||
sentencepiece # Required for LLaMA tokenizer.
|
||||
numpy
|
||||
tokenizers>=0.15.0
|
||||
transformers >= 4.39.1 # Required for StarCoder2 & Llava.
|
||||
fastapi
|
||||
uvicorn[standard]
|
||||
pydantic >= 2.0 # Required for OpenAI server.
|
||||
prometheus_client >= 0.18.0
|
||||
outlines == 0.0.34
|
||||
tiktoken == 0.6.0 # Required for DBRX tokenizer
|
||||
|
|
46
setup.py
46
setup.py
|
@ -325,32 +325,38 @@ def read_readme() -> str:
|
|||
|
||||
def get_requirements() -> List[str]:
|
||||
"""Get Python package dependencies from requirements.txt."""
|
||||
|
||||
def _read_requirements(filename: str) -> List[str]:
|
||||
with open(get_path(filename)) as f:
|
||||
requirements = f.read().strip().split("\n")
|
||||
resolved_requirements = []
|
||||
for line in requirements:
|
||||
if line.startswith("-r "):
|
||||
resolved_requirements += _read_requirements(line.split()[1])
|
||||
else:
|
||||
resolved_requirements.append(line)
|
||||
return resolved_requirements
|
||||
|
||||
if _is_cuda():
|
||||
with open(get_path("requirements.txt")) as f:
|
||||
requirements = f.read().strip().split("\n")
|
||||
cuda_major = torch.version.cuda.split(".")[0]
|
||||
modified_requirements = []
|
||||
for req in requirements:
|
||||
if "vllm-nccl-cu12" in req:
|
||||
modified_requirements.append(
|
||||
req.replace("vllm-nccl-cu12",
|
||||
f"vllm-nccl-cu{cuda_major}"))
|
||||
else:
|
||||
modified_requirements.append(req)
|
||||
requirements = modified_requirements
|
||||
requirements = _read_requirements("requirements-cuda.txt")
|
||||
cuda_major = torch.version.cuda.split(".")[0]
|
||||
modified_requirements = []
|
||||
for req in requirements:
|
||||
if "vllm-nccl-cu12" in req:
|
||||
modified_requirements.append(
|
||||
req.replace("vllm-nccl-cu12", f"vllm-nccl-cu{cuda_major}"))
|
||||
else:
|
||||
modified_requirements.append(req)
|
||||
requirements = modified_requirements
|
||||
elif _is_hip():
|
||||
with open(get_path("requirements-rocm.txt")) as f:
|
||||
requirements = f.read().strip().split("\n")
|
||||
requirements = _read_requirements("requirements-rocm.txt")
|
||||
elif _is_neuron():
|
||||
with open(get_path("requirements-neuron.txt")) as f:
|
||||
requirements = f.read().strip().split("\n")
|
||||
requirements = _read_requirements("requirements-neuron.txt")
|
||||
elif _is_cpu():
|
||||
with open(get_path("requirements-cpu.txt")) as f:
|
||||
requirements = f.read().strip().split("\n")
|
||||
requirements = _read_requirements("requirements-cpu.txt")
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unsupported platform, please use CUDA, ROCM or Neuron.")
|
||||
|
||||
"Unsupported platform, please use CUDA, ROCm, Neuron, or CPU.")
|
||||
return requirements
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue