Support GPT-NeoX Models without attention biases (#2301)

This commit is contained in:
Jong-hun Shin 2023-12-31 01:42:04 +09:00 committed by GitHub
parent 358c328d69
commit 4934d49274
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 3 additions and 0 deletions

View File

@ -54,6 +54,7 @@ class GPTNeoXAttention(nn.Module):
self.total_num_heads = config.num_attention_heads
self.hidden_size = config.hidden_size
self.head_size = self.hidden_size // self.total_num_heads
self.bias = getattr(config, "attention_bias", True)
tensor_model_parallel_world_size = (
get_tensor_model_parallel_world_size())
@ -65,11 +66,13 @@ class GPTNeoXAttention(nn.Module):
config.hidden_size,
self.head_size,
self.total_num_heads,
bias=self.bias,
linear_method=linear_method,
)
self.dense = RowParallelLinear(
config.hidden_size,
config.hidden_size,
bias=self.bias,
linear_method=linear_method,
)
scaling = self.head_size**-0.5