[Bug] Fix the OOM condition for CPU cache (#260)

This commit is contained in:
Zhuohan Li 2023-06-26 11:16:13 -07:00 committed by GitHub
parent 471a7a4566
commit 0b7db411b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 3 additions and 1 deletions

View File

@ -128,7 +128,7 @@ class LLMEngine:
logger.info(f'# GPU blocks: {num_gpu_blocks}, '
f'# CPU blocks: {num_cpu_blocks}')
if num_gpu_blocks <= 0 or num_cpu_blocks <= 0:
if num_gpu_blocks <= 0:
raise ValueError("No available memory for the cache blocks. "
"Try increasing `gpu_memory_utilization` when "
"initializing the engine.")

View File

@ -113,6 +113,8 @@ class Worker:
num_gpu_blocks = int((total_gpu_memory * gpu_memory_utilization
- peak_memory) // cache_block_size)
num_cpu_blocks = int(cpu_swap_space // cache_block_size)
num_gpu_blocks = max(num_gpu_blocks, 0)
num_cpu_blocks = max(num_cpu_blocks, 0)
torch.cuda.empty_cache()
# Reset the seed to ensure that the random state is not affected by