mirror of https://github.com/vllm-project/vllm
Suggest using dtype=half when OOM.
This commit is contained in:
parent
7878958c0d
commit
cb7a1c1cbf
|
@ -239,4 +239,6 @@ def _check_if_gpu_supports_dtype(torch_dtype: torch.dtype):
|
|||
raise ValueError(
|
||||
"Bfloat16 is only supported on GPUs with compute capability "
|
||||
f"of at least 8.0. Your {gpu_name} GPU has compute capability "
|
||||
f"{compute_capability[0]}.{compute_capability[1]}.")
|
||||
f"{compute_capability[0]}.{compute_capability[1]}."
|
||||
f" You can explicitly specify the data type by using the --dtype option, for example: --dtype=half."
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue