From 4934d492744d14104353b8236ef8a0405edf1622 Mon Sep 17 00:00:00 2001 From: Jong-hun Shin <20063100+dalgarak@users.noreply.github.com> Date: Sun, 31 Dec 2023 01:42:04 +0900 Subject: [PATCH] Support GPT-NeoX Models without attention biases (#2301) --- vllm/model_executor/models/gpt_neox.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm/model_executor/models/gpt_neox.py b/vllm/model_executor/models/gpt_neox.py index 1e340162dd..773fed36a9 100644 --- a/vllm/model_executor/models/gpt_neox.py +++ b/vllm/model_executor/models/gpt_neox.py @@ -54,6 +54,7 @@ class GPTNeoXAttention(nn.Module): self.total_num_heads = config.num_attention_heads self.hidden_size = config.hidden_size self.head_size = self.hidden_size // self.total_num_heads + self.bias = getattr(config, "attention_bias", True) tensor_model_parallel_world_size = ( get_tensor_model_parallel_world_size()) @@ -65,11 +66,13 @@ class GPTNeoXAttention(nn.Module): config.hidden_size, self.head_size, self.total_num_heads, + bias=self.bias, linear_method=linear_method, ) self.dense = RowParallelLinear( config.hidden_size, config.hidden_size, + bias=self.bias, linear_method=linear_method, ) scaling = self.head_size**-0.5