mirror of https://github.com/vllm-project/vllm
17 lines
470 B
Plaintext
17 lines
470 B
Plaintext
ninja # For faster builds.
|
|
psutil
|
|
ray >= 2.9
|
|
sentencepiece # Required for LLaMA tokenizer.
|
|
numpy
|
|
torch == 2.1.2
|
|
transformers >= 4.38.0 # Required for Gemma.
|
|
xformers == 0.0.23.post1 # Required for CUDA 12.1.
|
|
fastapi
|
|
uvicorn[standard]
|
|
pydantic >= 2.0 # Required for OpenAI server.
|
|
prometheus_client >= 0.18.0
|
|
pynvml == 11.5.0
|
|
triton >= 2.1.0
|
|
outlines >= 0.0.27
|
|
cupy-cuda12x == 12.1.0 # Required for CUDA graphs. CUDA 11.8 users should install cupy-cuda11x instead.
|