Suggest using dtype=half when OOM.

2024-01-13 04:33:29 +08:00 · 2024-01-13 04:33:29 +08:00 · cb7a1c1cbf
parent 7878958c0d
commit cb7a1c1cbf
1 changed files with 3 additions and 1 deletions
--- a/vllm/worker/worker.py
+++ b/vllm/worker/worker.py
@ -239,4 +239,6 @@ def _check_if_gpu_supports_dtype(torch_dtype: torch.dtype):
            raise ValueError(
                "Bfloat16 is only supported on GPUs with compute capability "
                f"of at least 8.0. Your {gpu_name} GPU has compute capability "
-                f"{compute_capability[0]}.{compute_capability[1]}.")
+                f"{compute_capability[0]}.{compute_capability[1]}."
+                f" You can explicitly specify the data type by using the --dtype option, for example: --dtype=half."
+            )