!47575 modify moe expert weight normalization

Merge pull request !47575 from wangshengnan123/master
This commit is contained in:
i-robot 2023-01-10 08:06:15 +00:00 committed by Gitee
commit d4c414dac0
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
1 changed files with 4 additions and 3 deletions

View File

@ -647,9 +647,10 @@ class TopkRouter(Cell):
self.on_value, self.off_value))
accum_combine_tensor = self.add2(accum_combine_tensor, combine_tensor)
# expert weights normalization
combine_tensor_sum = self.reduce_sum_keep2(self.reduce_sum_keep2(accum_combine_tensor, -1), -2)
accum_combine_tensor = self.div2(accum_combine_tensor, self.add4(combine_tensor_sum, 1e-9))
# expert weights normalization when k > 1
if self.num_experts_chosen > 1:
combine_tensor_sum = self.reduce_sum_keep2(self.reduce_sum_keep2(accum_combine_tensor, -1), -2)
accum_combine_tensor = self.div2(accum_combine_tensor, self.add4(combine_tensor_sum, 1e-9))
# dispatch_tensor is of boolean type. Here, using NotEqual instead of Cast, for that 'Cast to bool' has
# bad performance
dispatch_tensor = self.not_equal(accum_combine_tensor, 0.0)