forked from mindspore-Ecosystem/mindspore
!17641 [MSLITE] fix the random failed problem for a model when running on arm64_fp16 case
From: @probiotics_53 Reviewed-by: @zhanghaibo5,@hangangqiang Signed-off-by: @hangangqiang
This commit is contained in:
commit
9641b0c6a1
|
@ -235,7 +235,7 @@
|
|||
const int *perm = transpose_param->perm_; \
|
||||
const int *strides = transpose_param->strides_; \
|
||||
const int *out_strides = transpose_param->out_strides_; \
|
||||
int data_size = transpose_param->data_size_; \
|
||||
int data_size = transpose_param->data_num_ * sizeof(TYPE); \
|
||||
int num_axes = transpose_param->num_axes_; \
|
||||
bool needTranspose = false; \
|
||||
for (int i = 1; i < num_axes; ++i) { \
|
||||
|
|
|
@ -209,7 +209,7 @@ int DoTransposeFp16(const float16_t *in_data, float16_t *out_data, const int *ou
|
|||
int *perm = param->perm_;
|
||||
int *strides = param->strides_;
|
||||
int *out_strides = param->out_strides_;
|
||||
int data_size = param->data_size_;
|
||||
int data_size = param->data_num_ * sizeof(float16_t);
|
||||
int num_axes = param->num_axes_;
|
||||
|
||||
// check if transpose is needed
|
||||
|
|
|
@ -295,7 +295,7 @@ static void GetTransposeParameter(TransposeParameter *param, const int in_shape[
|
|||
param->perm_[i] = perm[i];
|
||||
shape_size *= perm[i]; // check overflow
|
||||
}
|
||||
param->data_size_ = (int)(shape_size * sizeof(float)); // check overflow
|
||||
param->data_num_ = (int)shape_size; // check overflow
|
||||
param->strides_[param->num_axes_ - 1] = 1;
|
||||
param->out_strides_[param->num_axes_ - 1] = 1;
|
||||
for (int i = param->num_axes_ - 2; i >= 0; i--) {
|
||||
|
|
|
@ -208,7 +208,7 @@ int DoTransposeFp32(const float *in_data, float *out_data, const int *output_sha
|
|||
int *perm = transpose_param->perm_;
|
||||
int *strides = transpose_param->strides_;
|
||||
int *out_strides = transpose_param->out_strides_;
|
||||
int data_size = transpose_param->data_size_;
|
||||
int data_size = transpose_param->data_num_ * sizeof(float);
|
||||
int num_axes = transpose_param->num_axes_;
|
||||
|
||||
// check if transpose is needed
|
||||
|
|
|
@ -193,7 +193,7 @@ int DoTransposeInt8(const int8_t *in_data, int8_t *out_data, const int *output_s
|
|||
}
|
||||
|
||||
if (!needTranspose) {
|
||||
(void)memcpy(out_data, in_data, transpose_param->data_size_);
|
||||
(void)memcpy(out_data, in_data, transpose_param->data_num_ * sizeof(int8_t));
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ typedef struct TransposeParameter {
|
|||
|
||||
// other parameter
|
||||
int num_axes_;
|
||||
int data_size_;
|
||||
int data_num_;
|
||||
} TransposeParameter;
|
||||
|
||||
#endif // MINDSPORE_NNACL_TRANSPOSE_H_
|
||||
|
|
|
@ -80,7 +80,7 @@ void TransposeCPUFwdKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
|
|||
const std::vector<AddressPtr> &outputs) {
|
||||
const auto *input_addr = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
auto *output_addr = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
transpose_param_.data_size_ = IntToSize(inputs[0]->size);
|
||||
transpose_param_.data_num_ = inputs[0]->size / sizeof(T);
|
||||
int output_shape[SizeToInt(output_shape_.size())];
|
||||
for (size_t i = 0; i < output_shape_.size(); ++i) {
|
||||
output_shape[i] = SizeToInt(output_shape_[i]);
|
||||
|
|
|
@ -42,7 +42,7 @@ int TransposeFp32Coder::Resize() {
|
|||
auto out_shape = output_tensor_->shape();
|
||||
param_->strides_[param_->num_axes_ - 1] = 1;
|
||||
param_->out_strides_[param_->num_axes_ - 1] = 1;
|
||||
param_->data_size_ = input_tensor_->Size();
|
||||
param_->data_num_ = input_tensor_->ElementsNum();
|
||||
for (int i = param_->num_axes_ - 2; i >= 0; i--) {
|
||||
param_->strides_[i] = in_shape.at(i + 1) * param_->strides_[i + 1];
|
||||
param_->out_strides_[i] = out_shape.at(i + 1) * param_->out_strides_[i + 1];
|
||||
|
|
|
@ -105,7 +105,7 @@ void NNaclFp32Serializer::CodeStruct(const std::string &name, const TransposePar
|
|||
CodeBaseStruct<false>(
|
||||
"TransposeParameter", name, transpose_parameter.op_parameter_, ToString(transpose_parameter.perm_),
|
||||
transpose_parameter.perm_size_, transpose_parameter.conjugate_, ToString(transpose_parameter.strides_),
|
||||
ToString(transpose_parameter.out_strides_), transpose_parameter.num_axes_, transpose_parameter.data_size_);
|
||||
ToString(transpose_parameter.out_strides_), transpose_parameter.num_axes_, transpose_parameter.data_num_);
|
||||
}
|
||||
|
||||
void NNaclFp32Serializer::CodeStruct(const std::string &name, const LstmParameter &lstm_parameter) {
|
||||
|
|
|
@ -62,7 +62,7 @@ int TransposeCPUKernel::ReSize() {
|
|||
auto out_shape = outTensor->shape();
|
||||
param_->strides_[param_->num_axes_ - 1] = 1;
|
||||
param_->out_strides_[param_->num_axes_ - 1] = 1;
|
||||
param_->data_size_ = inTensor->Size();
|
||||
param_->data_num_ = inTensor->ElementsNum();
|
||||
for (int i = param_->num_axes_ - 2; i >= 0; i--) {
|
||||
param_->strides_[i] = in_shape.at(i + 1) * param_->strides_[i + 1];
|
||||
param_->out_strides_[i] = out_shape.at(i + 1) * param_->out_strides_[i + 1];
|
||||
|
|
|
@ -47,7 +47,7 @@ int TransposeInt8CPUKernel::ReSize() {
|
|||
auto in_shape = in_tensor->shape();
|
||||
auto out_shape = out_tensor->shape();
|
||||
|
||||
transpose_param_->data_size_ = in_tensor->Size();
|
||||
transpose_param_->data_num_ = in_tensor->ElementsNum();
|
||||
|
||||
// get perm data
|
||||
auto perm_tensor = in_tensors_.at(1);
|
||||
|
|
|
@ -126,7 +126,7 @@ TEST_F(TestTransposeFp32, TransposeFp32_axes4) { /* 1x2x3x4 */
|
|||
}
|
||||
param->num_axes_ = 4;
|
||||
param->conjugate_ = false;
|
||||
param->data_size_ = 24 * sizeof(float);
|
||||
param->data_num_ = 24;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
param->perm_[i] = perm[i];
|
||||
param->strides_[i] = strides[i];
|
||||
|
@ -158,7 +158,7 @@ TEST_F(TestTransposeFp32, TransposeFp32_axes3) { /* 2x3x4 */
|
|||
}
|
||||
param->num_axes_ = 3;
|
||||
param->conjugate_ = false;
|
||||
param->data_size_ = 24 * sizeof(float);
|
||||
param->data_num_ = 24;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
param->perm_[i] = perm[i];
|
||||
param->strides_[i] = strides[i];
|
||||
|
@ -190,7 +190,7 @@ TEST_F(TestTransposeFp32, TransposeFp32_axes2) { /* 6x4 */
|
|||
}
|
||||
param->num_axes_ = 2;
|
||||
param->conjugate_ = false;
|
||||
param->data_size_ = 24 * sizeof(float);
|
||||
param->data_num_ = 24;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
param->perm_[i] = perm[i];
|
||||
param->strides_[i] = strides[i];
|
||||
|
|
Loading…
Reference in New Issue