mirror of https://github.com/vllm-project/vllm
[Bugfix] Bump transformers to 4.43.2 (#6752)
This commit is contained in:
parent
5448f67635
commit
421e218b37
|
@ -6,7 +6,7 @@ numpy < 2.0.0
|
||||||
requests
|
requests
|
||||||
tqdm
|
tqdm
|
||||||
py-cpuinfo
|
py-cpuinfo
|
||||||
transformers >= 4.43.1 # Required for Chameleon and Llama 3.1 hotfox.
|
transformers >= 4.43.2 # Required for Chameleon and Llama 3.1 hotfox.
|
||||||
tokenizers >= 0.19.1 # Required for Llama 3.
|
tokenizers >= 0.19.1 # Required for Llama 3.
|
||||||
fastapi
|
fastapi
|
||||||
aiohttp
|
aiohttp
|
||||||
|
|
|
@ -64,8 +64,9 @@ def test_get_sliding_window():
|
||||||
|
|
||||||
|
|
||||||
def test_rope_customization():
|
def test_rope_customization():
|
||||||
TEST_ROPE_SCALING = {"rope_type": "dynamic", "factor": 2.0}
|
TEST_ROPE_SCALING = {"type": "dynamic", "factor": 2.0}
|
||||||
TEST_ROPE_THETA = 16_000_000.0
|
TEST_ROPE_THETA = 16_000_000.0
|
||||||
|
LONGCHAT_ROPE_SCALING = {"type": "linear", "factor": 8.0}
|
||||||
|
|
||||||
llama_model_config = ModelConfig(
|
llama_model_config = ModelConfig(
|
||||||
"meta-llama/Meta-Llama-3-8B-Instruct",
|
"meta-llama/Meta-Llama-3-8B-Instruct",
|
||||||
|
@ -95,29 +96,29 @@ def test_rope_customization():
|
||||||
None) == TEST_ROPE_THETA
|
None) == TEST_ROPE_THETA
|
||||||
assert llama_model_config.max_model_len == 16384
|
assert llama_model_config.max_model_len == 16384
|
||||||
|
|
||||||
# TODO: add these back when the rope configs are fixed
|
longchat_model_config = ModelConfig(
|
||||||
# LONGCHAT_ROPE_SCALING = {"rope_type": "linear", "factor": 8.0}
|
"lmsys/longchat-13b-16k",
|
||||||
# longchat_model_config = ModelConfig(
|
"lmsys/longchat-13b-16k",
|
||||||
# "lmsys/longchat-13b-16k",
|
tokenizer_mode="auto",
|
||||||
# "lmsys/longchat-13b-16k",
|
trust_remote_code=False,
|
||||||
# tokenizer_mode="auto",
|
dtype="float16",
|
||||||
# trust_remote_code=False,
|
seed=0,
|
||||||
# dtype="float16",
|
)
|
||||||
# seed=0,
|
# Check if LONGCHAT_ROPE_SCALING entries are in longchat_model_config
|
||||||
# )
|
assert all(
|
||||||
# assert getattr(longchat_model_config.hf_config, "rope_scaling",
|
longchat_model_config.hf_config.rope_scaling.get(key) == value
|
||||||
# None) == LONGCHAT_ROPE_SCALING
|
for key, value in LONGCHAT_ROPE_SCALING.items())
|
||||||
# assert longchat_model_config.max_model_len == 16384
|
assert longchat_model_config.max_model_len == 16384
|
||||||
|
|
||||||
# longchat_model_config = ModelConfig(
|
longchat_model_config = ModelConfig(
|
||||||
# "lmsys/longchat-13b-16k",
|
"lmsys/longchat-13b-16k",
|
||||||
# "lmsys/longchat-13b-16k",
|
"lmsys/longchat-13b-16k",
|
||||||
# tokenizer_mode="auto",
|
tokenizer_mode="auto",
|
||||||
# trust_remote_code=False,
|
trust_remote_code=False,
|
||||||
# dtype="float16",
|
dtype="float16",
|
||||||
# seed=0,
|
seed=0,
|
||||||
# rope_scaling=TEST_ROPE_SCALING,
|
rope_scaling=TEST_ROPE_SCALING,
|
||||||
# )
|
)
|
||||||
# assert getattr(longchat_model_config.hf_config, "rope_scaling",
|
assert getattr(longchat_model_config.hf_config, "rope_scaling",
|
||||||
# None) == TEST_ROPE_SCALING
|
None) == TEST_ROPE_SCALING
|
||||||
# assert longchat_model_config.max_model_len == 4096
|
assert longchat_model_config.max_model_len == 4096
|
||||||
|
|
Loading…
Reference in New Issue