diff --git a/requirements.txt b/requirements.txt index dea25ac6d5..63a5e8aa62 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,8 +6,8 @@ pyarrow # Required for Ray data. sentencepiece # Required for LLaMA tokenizer. numpy torch == 2.0.1 -transformers >= 4.33.1 # Required for Code Llama. -xformers == 0.0.22 +transformers >= 4.34.0 # Required for Mistral. +xformers == 0.0.22 # Required for Mistral. fastapi uvicorn[standard] pydantic < 2 # Required for OpenAI server. diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py index 01d85355b2..35d72c1630 100644 --- a/vllm/model_executor/models/__init__.py +++ b/vllm/model_executor/models/__init__.py @@ -9,10 +9,10 @@ from vllm.model_executor.models.gpt_j import GPTJForCausalLM from vllm.model_executor.models.gpt_neox import GPTNeoXForCausalLM from vllm.model_executor.models.internlm import InternLMForCausalLM from vllm.model_executor.models.llama import LlamaForCausalLM +from vllm.model_executor.models.mistral import MistralForCausalLM from vllm.model_executor.models.mpt import MPTForCausalLM from vllm.model_executor.models.opt import OPTForCausalLM from vllm.model_executor.models.qwen import QWenLMHeadModel -from vllm.model_executor.models.mistral import MistralForCausalLM __all__ = [ "AquilaForCausalLM", diff --git a/vllm/model_executor/models/mistral.py b/vllm/model_executor/models/mistral.py index 746f21b585..896d49eedc 100644 --- a/vllm/model_executor/models/mistral.py +++ b/vllm/model_executor/models/mistral.py @@ -29,6 +29,7 @@ from typing import List, Optional, Tuple import torch from torch import nn +from transformers import MistralConfig from vllm.model_executor.input_metadata import InputMetadata from vllm.model_executor.layers.activation import SiluAndMul @@ -44,7 +45,6 @@ from vllm.model_executor.weight_utils import ( convert_pyslice_to_tensor, hf_model_weights_iterator, load_tensor_parallel_weights, load_padded_tensor_parallel_vocab) from vllm.sequence import SamplerOutput -from vllm.transformers_utils.configs.mistral import MistralConfig KVCache = Tuple[torch.Tensor, torch.Tensor] diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index a1efbedb68..fd5618bd81 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -17,15 +17,6 @@ _CONFIG_REGISTRY = { def get_config(model: str, trust_remote_code: bool, revision: Optional[str] = None) -> PretrainedConfig: - # NOTE: Because the Mistral model in HF hub does not have - # `configuration_mistral.py`, we cannot use `AutoConfig` to load the - # config. Instead, we use `MistralConfig` directly. - # NOTE: This is a hack. This does not work for local models. - # FIXME: Remove this once the Mistral model is available in the stable - # version of HF transformers. - if "mistral" in model.lower(): - return MistralConfig.from_pretrained(model, revision=revision) - try: config = AutoConfig.from_pretrained( model, trust_remote_code=trust_remote_code, revision=revision) diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py index 3955c772b7..6611697d25 100644 --- a/vllm/transformers_utils/configs/__init__.py +++ b/vllm/transformers_utils/configs/__init__.py @@ -6,7 +6,6 @@ from vllm.transformers_utils.configs.qwen import QWenConfig # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the # `FalconConfig` class from the official HuggingFace transformers library. from vllm.transformers_utils.configs.falcon import RWConfig -from vllm.transformers_utils.configs.mistral import MistralConfig __all__ = [ "MPTConfig", @@ -14,5 +13,4 @@ __all__ = [ "AquilaConfig", "QWenConfig", "RWConfig", - "MistralConfig", ] diff --git a/vllm/transformers_utils/configs/mistral.py b/vllm/transformers_utils/configs/mistral.py deleted file mode 100644 index 0a7d9a8efa..0000000000 --- a/vllm/transformers_utils/configs/mistral.py +++ /dev/null @@ -1,66 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Mistral-7B-v0.1 configuration""" -from transformers.configuration_utils import PretrainedConfig - - -class MistralConfig(PretrainedConfig): - model_type = "mistral" - keys_to_ignore_at_inference = ["past_key_values"] - - def __init__( - self, - vocab_size=32000, - hidden_size=4096, - intermediate_size=14336, - num_hidden_layers=32, - num_attention_heads=32, - num_key_value_heads=8, - hidden_act="silu", - max_position_embeddings=4096 * 32, - initializer_range=0.02, - rms_norm_eps=1e-6, - use_cache=True, - pad_token_id=None, - bos_token_id=1, - eos_token_id=2, - tie_word_embeddings=False, - rope_theta=10000.0, - sliding_window=4096, - **kwargs, - ): - self.vocab_size = vocab_size - self.max_position_embeddings = max_position_embeddings - self.hidden_size = hidden_size - self.intermediate_size = intermediate_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.sliding_window = sliding_window - - # for backward compatibility - if num_key_value_heads is None: - num_key_value_heads = num_attention_heads - - self.num_key_value_heads = num_key_value_heads - self.hidden_act = hidden_act - self.initializer_range = initializer_range - self.rms_norm_eps = rms_norm_eps - self.use_cache = use_cache - self.rope_theta = rope_theta - - super().__init__( - pad_token_id=pad_token_id, - bos_token_id=bos_token_id, - eos_token_id=eos_token_id, - tie_word_embeddings=tie_word_embeddings, - **kwargs, - )