mirror of https://github.com/vllm-project/vllm
[Misc] Optional installation of audio related packages (#8063)
This commit is contained in:
parent
5231f0898e
commit
5b86b19954
|
@ -22,9 +22,7 @@ typing_extensions >= 4.10
|
|||
filelock >= 3.10.4 # filelock starts to support `mode` argument from 3.10.4
|
||||
pyzmq
|
||||
msgspec
|
||||
librosa # Required for audio processing
|
||||
soundfile # Required for audio processing
|
||||
gguf == 0.9.1
|
||||
importlib_metadata
|
||||
mistral_common >= 1.3.4
|
||||
pyyaml
|
||||
pyyaml
|
||||
|
|
|
@ -13,10 +13,12 @@ pytest-shard
|
|||
awscli
|
||||
einops # required for MPT, qwen-vl and Mamba
|
||||
httpx
|
||||
librosa # required for audio test
|
||||
peft
|
||||
requests
|
||||
ray
|
||||
sentence-transformers # required for embedding
|
||||
soundfile # required for audio test
|
||||
compressed-tensors==0.4.0 # required for compressed-tensors
|
||||
timm # required for internvl test
|
||||
transformers_stream_generator # required for qwen-vl test
|
||||
|
@ -30,4 +32,4 @@ aiohttp
|
|||
|
||||
# quantization
|
||||
bitsandbytes==0.42.0
|
||||
buildkite-test-collector==0.1.8
|
||||
buildkite-test-collector==0.1.8
|
||||
|
|
1
setup.py
1
setup.py
|
@ -501,6 +501,7 @@ setup(
|
|||
ext_modules=ext_modules,
|
||||
extras_require={
|
||||
"tensorizer": ["tensorizer>=2.9.0"],
|
||||
"audio": ["librosa", "soundfile"] # Required for audio processing
|
||||
},
|
||||
cmdclass={"build_ext": cmake_build_ext} if len(ext_modules) > 0 else {},
|
||||
package_data=package_data,
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
from typing import List, Optional, Tuple, Type
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import pytest
|
||||
from transformers import AutoModel, AutoTokenizer, BatchEncoding
|
||||
|
||||
from vllm.assets.audio import AudioAsset
|
||||
from vllm.sequence import SampleLogprobs
|
||||
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
|
||||
|
||||
|
@ -21,6 +19,7 @@ AudioTuple = Tuple[np.ndarray, int]
|
|||
|
||||
@pytest.fixture(scope="session")
|
||||
def audio_and_sample_rate():
|
||||
from vllm.assets.audio import AudioAsset
|
||||
return AudioAsset("mary_had_lamb").audio_and_sample_rate
|
||||
|
||||
|
||||
|
@ -109,6 +108,7 @@ def run_test(
|
|||
dtype=dtype,
|
||||
postprocess_inputs=process,
|
||||
auto_cls=AutoModel) as hf_model:
|
||||
import librosa
|
||||
|
||||
hf_outputs_per_audio = [
|
||||
hf_model.generate_greedy_logprobs_limit(
|
||||
|
|
|
@ -8,7 +8,6 @@ from functools import lru_cache
|
|||
from typing import (Iterable, List, Literal, Mapping, Optional, Tuple,
|
||||
TypedDict, Union, cast)
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.utils.checkpoint
|
||||
|
@ -107,6 +106,11 @@ def input_mapper_for_ultravox(ctx: InputContext, data: object):
|
|||
feature_extractor = whisper_feature_extractor(ctx)
|
||||
|
||||
if sr != feature_extractor.sampling_rate:
|
||||
try:
|
||||
import librosa
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install vllm[audio] for audio support.") from None
|
||||
audio = librosa.resample(audio,
|
||||
orig_sr=sr,
|
||||
target_sr=feature_extractor.sampling_rate)
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
import base64
|
||||
from functools import lru_cache
|
||||
from io import BytesIO
|
||||
from typing import List, Optional, Tuple, TypeVar, Union
|
||||
from typing import Any, List, Optional, Tuple, TypeVar, Union
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import soundfile
|
||||
from PIL import Image
|
||||
|
||||
from vllm.connections import global_http_connection
|
||||
|
@ -73,10 +71,22 @@ async def async_fetch_image(image_url: str,
|
|||
return image.convert(image_mode)
|
||||
|
||||
|
||||
def try_import_audio_packages() -> Tuple[Any, Any]:
|
||||
try:
|
||||
import librosa
|
||||
import soundfile
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install vllm[audio] for audio support.") from None
|
||||
return librosa, soundfile
|
||||
|
||||
|
||||
def fetch_audio(audio_url: str) -> Tuple[np.ndarray, Union[int, float]]:
|
||||
"""
|
||||
Load audio from a URL.
|
||||
"""
|
||||
librosa, _ = try_import_audio_packages()
|
||||
|
||||
if audio_url.startswith("http"):
|
||||
audio_bytes = global_http_connection.get_bytes(
|
||||
audio_url, timeout=VLLM_AUDIO_FETCH_TIMEOUT)
|
||||
|
@ -95,6 +105,8 @@ async def async_fetch_audio(
|
|||
"""
|
||||
Asynchronously fetch audio from a URL.
|
||||
"""
|
||||
librosa, _ = try_import_audio_packages()
|
||||
|
||||
if audio_url.startswith("http"):
|
||||
audio_bytes = await global_http_connection.async_get_bytes(
|
||||
audio_url, timeout=VLLM_AUDIO_FETCH_TIMEOUT)
|
||||
|
@ -123,6 +135,8 @@ def encode_audio_base64(
|
|||
sampling_rate: int,
|
||||
) -> str:
|
||||
"""Encode audio as base64."""
|
||||
_, soundfile = try_import_audio_packages()
|
||||
|
||||
buffered = BytesIO()
|
||||
soundfile.write(buffered, audio, sampling_rate, format="WAV")
|
||||
|
||||
|
|
Loading…
Reference in New Issue