[V1]Enable APC by default only for text models (#10148)

Signed-off-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
Roger Wang 2024-11-08 06:39:41 -08:00 committed by GitHub
parent 1ff4aed5bd
commit 208ce622c7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 4 additions and 1 deletions

View File

@ -65,7 +65,10 @@ class LLMEngine:
elif usage_context == UsageContext.OPENAI_API_SERVER:
scheduler_config.max_num_seqs = 1024
scheduler_config.max_num_batched_tokens = 2048
cache_config.enable_prefix_caching = True
# TODO (ywang96): Enable APC by default when VLM supports it.
if not model_config.is_multimodal_model:
cache_config.enable_prefix_caching = True
logger.info(
"Initializing an LLM engine (v%s) with config: "