mirror of https://github.com/vllm-project/vllm
Only emit warning about internal tokenizer if it isn't being used (#939)
This commit is contained in:
parent
ce741ba3e4
commit
e15932bb60
|
@ -25,7 +25,8 @@ def get_tokenizer(
|
|||
"Cannot use the fast tokenizer in slow tokenizer mode.")
|
||||
kwargs["use_fast"] = False
|
||||
|
||||
if "llama" in tokenizer_name.lower() and kwargs.get("use_fast", True):
|
||||
if ("llama" in tokenizer_name.lower() and kwargs.get("use_fast", True)
|
||||
and tokenizer_name != _FAST_LLAMA_TOKENIZER):
|
||||
logger.info(
|
||||
"For some LLaMA-based models, initializing the fast tokenizer may "
|
||||
"take a long time. To eliminate the initialization time, consider "
|
||||
|
|
Loading…
Reference in New Issue