!48751 [MS] fix addcdiv && addcmul gpu kernel bug

Merge pull request !48751 from jianghui58/ops_bugfix2
This commit is contained in:
i-robot 2023-02-11 09:42:53 +00:00 committed by Gitee
commit e70b601c47
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
2 changed files with 30 additions and 2 deletions

View File

@ -589,7 +589,7 @@ void CalAddcdiv(const std::vector<int64_t> &input_data_dims, const std::vector<i
inp *= input_data_dims[j];
x1_ *= x1_dims[j];
x2_ *= x2_dims[j];
v *= output_dims[j];
v *= value_dims[j];
}
output_broadcast_used[i] = o;
input_data_broadcast_used[i] = inp;
@ -609,6 +609,20 @@ void CalAddcdiv(const std::vector<int64_t> &input_data_dims, const std::vector<i
} else if (size_value == 1) {
Addcdiv_all_same_value1<<<CUDA_BLOCKS(device_id, size), CUDA_THREADS(device_id), 0, stream>>>(
input_data, x1, x2, value, output, size);
} else {
Addcdiv<<<CUDA_BLOCKS(device_id, size), CUDA_THREADS(device_id), 0, stream>>>(
input_data_dims[0], input_data_dims[1], input_data_dims[2], input_data_dims[3], input_data_dims[4],
input_data_dims[5], input_data_dims[6], x1_dims[0], x1_dims[1], x1_dims[2], x1_dims[3], x1_dims[4], x1_dims[5],
x1_dims[6], x2_dims[0], x2_dims[1], x2_dims[2], x2_dims[3], x2_dims[4], x2_dims[5], x2_dims[6], value_dims[0],
value_dims[1], value_dims[2], value_dims[3], value_dims[4], value_dims[5], value_dims[6], output_dims[0],
output_dims[1], output_dims[2], output_dims[3], output_dims[4], output_dims[5], output_dims[6],
output_broadcast_used[0], output_broadcast_used[1], output_broadcast_used[2], output_broadcast_used[3],
output_broadcast_used[4], input_data_broadcast_used[0], input_data_broadcast_used[1],
input_data_broadcast_used[2], input_data_broadcast_used[3], input_data_broadcast_used[4], x1_broadcast_used[0],
x1_broadcast_used[1], x1_broadcast_used[2], x1_broadcast_used[3], x1_broadcast_used[4], x2_broadcast_used[0],
x2_broadcast_used[1], x2_broadcast_used[2], x2_broadcast_used[3], x2_broadcast_used[4], value_broadcast_used[0],
value_broadcast_used[1], value_broadcast_used[2], value_broadcast_used[3], value_broadcast_used[4], input_data,
x1, x2, value, output, size);
}
} else if (size_value == 1) {
Addcdiv_value1<<<CUDA_BLOCKS(device_id, size), CUDA_THREADS(device_id), 0, stream>>>(

View File

@ -588,7 +588,7 @@ void CalAddcmul(const std::vector<int64_t> &input_data_dims, const std::vector<i
inp *= input_data_dims[j];
x1_ *= x1_dims[j];
x2_ *= x2_dims[j];
v *= output_dims[j];
v *= value_dims[j];
}
output_broadcast_used[i] = o;
input_data_broadcast_used[i] = inp;
@ -608,6 +608,20 @@ void CalAddcmul(const std::vector<int64_t> &input_data_dims, const std::vector<i
} else if (size_value == 1) {
Addcmul_all_same_value1<<<CUDA_BLOCKS(device_id, size), CUDA_THREADS(device_id), 0, stream>>>(
input_data, x1, x2, value, output, size);
} else {
Addcmul<<<CUDA_BLOCKS(device_id, size), CUDA_THREADS(device_id), 0, stream>>>(
input_data_dims[0], input_data_dims[1], input_data_dims[2], input_data_dims[3], input_data_dims[4],
input_data_dims[5], input_data_dims[6], x1_dims[0], x1_dims[1], x1_dims[2], x1_dims[3], x1_dims[4], x1_dims[5],
x1_dims[6], x2_dims[0], x2_dims[1], x2_dims[2], x2_dims[3], x2_dims[4], x2_dims[5], x2_dims[6], value_dims[0],
value_dims[1], value_dims[2], value_dims[3], value_dims[4], value_dims[5], value_dims[6], output_dims[0],
output_dims[1], output_dims[2], output_dims[3], output_dims[4], output_dims[5], output_dims[6],
output_broadcast_used[0], output_broadcast_used[1], output_broadcast_used[2], output_broadcast_used[3],
output_broadcast_used[4], input_data_broadcast_used[0], input_data_broadcast_used[1],
input_data_broadcast_used[2], input_data_broadcast_used[3], input_data_broadcast_used[4], x1_broadcast_used[0],
x1_broadcast_used[1], x1_broadcast_used[2], x1_broadcast_used[3], x1_broadcast_used[4], x2_broadcast_used[0],
x2_broadcast_used[1], x2_broadcast_used[2], x2_broadcast_used[3], x2_broadcast_used[4], value_broadcast_used[0],
value_broadcast_used[1], value_broadcast_used[2], value_broadcast_used[3], value_broadcast_used[4], input_data,
x1, x2, value, output, size);
}
} else if (size_value == 1) {
Addcmul_value1<<<CUDA_BLOCKS(device_id, size), CUDA_THREADS(device_id), 0, stream>>>(