From 9734f2a88e19d45082cea5b7a4b471ee4fcb614c Mon Sep 17 00:00:00 2001 From: sunsuodong Date: Thu, 20 Aug 2020 19:47:24 +0800 Subject: [PATCH] batch_norm_fp16 --- mindspore/lite/nnacl/fp16/batchnorm_fp16.c | 52 +++++++ mindspore/lite/nnacl/fp16/batchnorm_fp16.h | 37 +++++ mindspore/lite/nnacl/fp32/batchnorm.c | 42 +++-- mindspore/lite/nnacl/fp32/batchnorm.h | 10 +- .../runtime/kernel/arm/fp16/batchnorm_fp16.cc | 87 +++++++++++ .../runtime/kernel/arm/fp16/batchnorm_fp16.h | 36 +++++ .../kernel/arm/fp16/fused_batchnorm_fp16.cc | 103 +++++++++++++ .../kernel/arm/fp16/fused_batchnorm_fp16.h | 36 +++++ .../src/runtime/kernel/arm/fp32/batchnorm.cc | 129 ++++++---------- .../src/runtime/kernel/arm/fp32/batchnorm.h | 24 +-- .../kernel/arm/fp32/fused_batchnorm.cc | 143 ++++-------------- .../runtime/kernel/arm/fp32/fused_batchnorm.h | 31 ++-- .../kernel/arm/fp32/batchnorm_fp32_tests.cc | 125 ++++++--------- 13 files changed, 538 insertions(+), 317 deletions(-) create mode 100644 mindspore/lite/nnacl/fp16/batchnorm_fp16.c create mode 100644 mindspore/lite/nnacl/fp16/batchnorm_fp16.h create mode 100644 mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc create mode 100644 mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.h create mode 100644 mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc create mode 100644 mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.h diff --git a/mindspore/lite/nnacl/fp16/batchnorm_fp16.c b/mindspore/lite/nnacl/fp16/batchnorm_fp16.c new file mode 100644 index 00000000000..baa9e6dfdf1 --- /dev/null +++ b/mindspore/lite/nnacl/fp16/batchnorm_fp16.c @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnacl/fp16/batchnorm_fp16.h" +#include + +void BatchNormFp16(const void *input, const void *mean, const void *variance, + BatchNormParameter *param, int task_id, void *output) { + int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); + int completed_units = task_id * units_per_thread; + int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); + int cur_offset = completed_units * param->channel_; + + for (int i = 0; i < cur_unit; i++) { + for (int c = 0; c < param->channel_; c++) { + float16_t variance_sqrt = sqrt(((const float16_t *)variance)[c] + param->epsilon_); + ((float16_t *)output)[cur_offset + c] = + (((const float16_t *)input)[cur_offset + c] - ((const float16_t *)mean)[c]) / variance_sqrt; + } + cur_offset += param->channel_; + } +} + +void FusedBatchNormFp16(const void *input, const void *scale, const void *offset, const void *mean, + const void *variance, BatchNormParameter *param, int task_id, void *output) { + int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); + int completed_units = task_id * units_per_thread; + int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); + int cur_offset = completed_units * param->channel_; + + for (int i = 0; i < cur_unit; i++) { + for (int c = 0; c < param->channel_; c++) { + float16_t variance_sqrt = sqrt(((const float16_t *)variance)[c] + param->epsilon_); + float16_t norm_val = (((const float16_t *)input)[cur_offset + c] - ((const float16_t *)mean)[c]) / variance_sqrt; + ((float16_t *)output)[cur_offset + c] = norm_val * ((const float16_t *)scale)[c] + ((const float16_t *)offset)[c]; + } + cur_offset += param->channel_; + } +} diff --git a/mindspore/lite/nnacl/fp16/batchnorm_fp16.h b/mindspore/lite/nnacl/fp16/batchnorm_fp16.h new file mode 100644 index 00000000000..673bcd46fae --- /dev/null +++ b/mindspore/lite/nnacl/fp16/batchnorm_fp16.h @@ -0,0 +1,37 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_BATCHNORM_FP16_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_BATCHNORM_FP16_H_ + +#ifdef ENABLE_NEON +#include +#endif +#include "nnacl/batchnorm_parameter.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void BatchNormFp16(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id, + void *output); +void FusedBatchNormFp16(const void *input, const void *scale, const void *offset, const void *mean, + const void *variance, BatchNormParameter *param, int task_id, void *output); + +#ifdef __cplusplus +} +#endif + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_BATCHNORM_FP16_H_ diff --git a/mindspore/lite/nnacl/fp32/batchnorm.c b/mindspore/lite/nnacl/fp32/batchnorm.c index 22c4cd668d2..5efde546ce5 100644 --- a/mindspore/lite/nnacl/fp32/batchnorm.c +++ b/mindspore/lite/nnacl/fp32/batchnorm.c @@ -15,26 +15,42 @@ */ #include "nnacl/fp32/batchnorm.h" +#include "nnacl/fp16/batchnorm_fp16.h" #include #include "nnacl/batchnorm_parameter.h" +#include "nnacl/op_base.h" +#include "nnacl/errorcode.h" -void BatchNorm(float *output_ptr, const float *input_ptr, const float *mean_ptr, const float *variance_ptr, int task_id, - BatchNormParameter *param) { - for (int c = task_id; c < param->channel_; c += param->op_parameter_.thread_num_) { - float variance_sqrt = sqrt(variance_ptr[c] + param->epsilon_); - for (int u = 0; u < param->unit_; u++) { - output_ptr[u * param->channel_ + c] = (input_ptr[u * param->channel_ + c] - mean_ptr[c]) / variance_sqrt; +void BatchNormFp32(const void *input, const void *mean, const void *variance, + BatchNormParameter *param, int task_id, void *output) { + int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); + int completed_units = task_id * units_per_thread; + int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); + int cur_offset = completed_units * param->channel_; + + for (int i = 0; i < cur_unit; i++) { + for (int c = 0; c < param->channel_; c++) { + float variance_sqrt = sqrt(((const float *)variance)[c] + param->epsilon_); + ((float *)output)[cur_offset + c] = + (((const float *)input)[cur_offset + c] - ((const float *)mean)[c]) / variance_sqrt; } + cur_offset += param->channel_; } } -void FusedBatchNorm(float *output_ptr, const float *input_ptr, const float *scale_ptr, const float *offest_ptr, - const float *mean_ptr, const float *variance_ptr, int task_id, BatchNormParameter *param) { - for (int c = task_id; c < param->channel_; c += param->op_parameter_.thread_num_) { - float variance_sqrt = sqrt(variance_ptr[c] + param->epsilon_); - for (int u = 0; u < param->unit_; u++) { - output_ptr[u * param->channel_ + c] = - (input_ptr[u * param->channel_ + c] - mean_ptr[c]) / variance_sqrt * scale_ptr[c] + offest_ptr[c]; +void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean, + const void *variance, BatchNormParameter *param, int task_id, void *output) { + int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); + int completed_units = task_id * units_per_thread; + int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); + int cur_offset = completed_units * param->channel_; + + for (int i = 0; i < cur_unit; i++) { + for (int c = 0; c < param->channel_; c++) { + float variance_sqrt = sqrt(((const float *)variance)[c] + param->epsilon_); + float norm_val = (((const float *)input)[cur_offset + c] - ((const float *)mean)[c]) / variance_sqrt; + ((float *)output)[cur_offset + c] = norm_val * ((const float *)scale)[c] + ((const float *)offset)[c]; } + cur_offset += param->channel_; } } diff --git a/mindspore/lite/nnacl/fp32/batchnorm.h b/mindspore/lite/nnacl/fp32/batchnorm.h index 5d5bc4e3879..84b675c8db0 100644 --- a/mindspore/lite/nnacl/fp32/batchnorm.h +++ b/mindspore/lite/nnacl/fp32/batchnorm.h @@ -17,18 +17,16 @@ #ifndef MINDSPORE_LITE_NNACL_FP32_BATCHNORM_H_ #define MINDSPORE_LITE_NNACL_FP32_BATCHNORM_H_ -#include "nnacl/op_base.h" #include "nnacl/batchnorm_parameter.h" #ifdef __cplusplus extern "C" { #endif -void BatchNorm(float *output_ptr, const float *input_ptr, const float *mean_ptr, const float *variance_ptr, int task_id, - BatchNormParameter *param); - -void FusedBatchNorm(float *output_ptr, const float *input_ptr, const float *scale_ptr, const float *offest_ptr, - const float *mean_ptr, const float *variance_ptr, int task_id, BatchNormParameter *param); +void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id, + void *output); +void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean, + const void *variance, BatchNormParameter *param, int task_id, void *output); #ifdef __cplusplus } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc new file mode 100644 index 00000000000..6de61af026c --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc @@ -0,0 +1,87 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/fp16/batchnorm_fp16.h" +#include "nnacl/fp16/batchnorm_fp16.h" +#include "nnacl/fp16/cast_fp16.h" +#include "src/kernel_registry.h" + +using mindspore::lite::KernelRegistrar; +using mindspore::schema::PrimitiveType_BatchNorm; + +namespace mindspore::kernel { +int BatchnormFp16CPUKernel::DoExecute(int task_id) { + auto param = reinterpret_cast(op_parameter_); + + if (in_tensors_.at(0)->data_type() == kNumberTypeFloat32) { + auto input = in_tensors_.at(0); + auto mean = in_tensors_.at(1); + auto variance = in_tensors_.at(2); + auto output = out_tensors_.at(0); + + auto input_fp16 = context_->allocator->Malloc(input->ElementsNum() * sizeof(float16_t)); + auto mean_fp16 = context_->allocator->Malloc(mean->ElementsNum() * sizeof(float16_t)); + auto variance_fp16 = context_->allocator->Malloc(variance->ElementsNum() * sizeof(float16_t)); + auto output_fp16 = context_->allocator->Malloc(output->ElementsNum() * sizeof(float16_t)); + if (input_fp16 == nullptr || mean_fp16 == nullptr || variance_fp16 == nullptr || output_fp16 == nullptr) { + context_->allocator->Free(input_fp16); + context_->allocator->Free(mean_fp16); + context_->allocator->Free(variance_fp16); + context_->allocator->Free(output_fp16); + } + Float32ToFloat16(reinterpret_cast(input->Data()), + reinterpret_cast(input_fp16), input->ElementsNum()); + Float32ToFloat16(reinterpret_cast(mean->Data()), + reinterpret_cast(mean_fp16), mean->ElementsNum()); + Float32ToFloat16(reinterpret_cast(variance->Data()), + reinterpret_cast(variance_fp16), variance->ElementsNum()); + + BatchNormFp16(input_fp16, mean_fp16, variance_fp16, param, task_id, output_fp16); + + Float16ToFloat32(reinterpret_cast(output_fp16), reinterpret_cast(output), + output->ElementsNum()); + context_->allocator->Free(input_fp16); + context_->allocator->Free(mean_fp16); + context_->allocator->Free(variance_fp16); + context_->allocator->Free(output_fp16); + return mindspore::lite::RET_OK; + } + BatchNormFp16(in_tensors_.at(0)->Data(), mean_, variance_, param, task_id, out_tensors_.at(0)->Data()); + return mindspore::lite::RET_OK; +} + +kernel::LiteKernel *CpuBatchnormFp16KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::Context *ctx, + const kernel::KernelKey &desc, + const mindspore::lite::PrimitiveC *primitive) { + auto *kernel = new (std::nothrow) BatchnormFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive); + if (kernel == nullptr) { + MS_LOG(ERROR) << "new BatchnormFp16CPUKernel fail!"; + return nullptr; + } + auto ret = kernel->Init(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + +// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_BatchNorm, CpuBatchnormFp16KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.h new file mode 100644 index 00000000000..eb493fc086f --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.h @@ -0,0 +1,36 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_BATCHNORM_FP16_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_BATCHNORM_FP16_H_ + +#include +#include "src/runtime/kernel/arm/fp32/batchnorm.h" + +namespace mindspore::kernel { +class BatchnormFp16CPUKernel : public BatchnormCPUKernel { + public: + BatchnormFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const Context *ctx, + const mindspore::lite::PrimitiveC *primitive) + : BatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {} + virtual ~BatchnormFp16CPUKernel() {} + + virtual int DoExecute(int task_id); +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_BATCHNORM_FP16_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc new file mode 100644 index 00000000000..4e097b8aa47 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc @@ -0,0 +1,103 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.h" +#include "nnacl/fp16/batchnorm_fp16.h" +#include "nnacl/fp16/cast_fp16.h" +#include "src/kernel_registry.h" + +using mindspore::lite::KernelRegistrar; +using mindspore::schema::PrimitiveType_FusedBatchNorm; + +namespace mindspore::kernel { +int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) { + auto param = reinterpret_cast(op_parameter_); + + if (in_tensors_.at(0)->data_type() == kNumberTypeFloat32) { + auto input = in_tensors_.at(0); + auto scale = in_tensors_.at(1); + auto offset = in_tensors_.at(2); + auto mean = in_tensors_.at(3); + auto variance = in_tensors_.at(4); + auto output = out_tensors_.at(0); + + auto input_fp16 = context_->allocator->Malloc(input->ElementsNum() * sizeof(float16_t)); + auto scale_fp16 = context_->allocator->Malloc(scale->ElementsNum() * sizeof(float16_t)); + auto offset_fp16 = context_->allocator->Malloc(offset->ElementsNum() * sizeof(float16_t)); + auto mean_fp16 = context_->allocator->Malloc(mean->ElementsNum() * sizeof(float16_t)); + auto variance_fp16 = context_->allocator->Malloc(variance->ElementsNum() * sizeof(float16_t)); + auto output_fp16 = context_->allocator->Malloc(output->ElementsNum() * sizeof(float16_t)); + if (input_fp16 == nullptr || scale_fp16 == nullptr || offset_fp16 == nullptr || + mean_fp16 == nullptr || variance_fp16 == nullptr || output_fp16 == nullptr) { + context_->allocator->Free(input_fp16); + context_->allocator->Free(scale_fp16); + context_->allocator->Free(offset_fp16); + context_->allocator->Free(mean_fp16); + context_->allocator->Free(variance_fp16); + context_->allocator->Free(output_fp16); + } + Float32ToFloat16(reinterpret_cast(input->Data()), + reinterpret_cast(input_fp16), input->ElementsNum()); + Float32ToFloat16(reinterpret_cast(scale->Data()), + reinterpret_cast(scale_fp16), scale->ElementsNum()); + Float32ToFloat16(reinterpret_cast(offset->Data()), + reinterpret_cast(offset_fp16), offset->ElementsNum()); + Float32ToFloat16(reinterpret_cast(mean->Data()), + reinterpret_cast(mean_fp16), mean->ElementsNum()); + Float32ToFloat16(reinterpret_cast(variance->Data()), + reinterpret_cast(variance_fp16), variance->ElementsNum()); + + FusedBatchNormFp16(input_fp16, scale_fp16, offset_fp16, mean_fp16, variance_fp16, param, task_id, + output_fp16); + + Float16ToFloat32(reinterpret_cast(output_fp16), reinterpret_cast(output), + output->ElementsNum()); + context_->allocator->Free(input_fp16); + context_->allocator->Free(scale_fp16); + context_->allocator->Free(offset_fp16); + context_->allocator->Free(mean_fp16); + context_->allocator->Free(variance_fp16); + context_->allocator->Free(output_fp16); + return mindspore::lite::RET_OK; + } + FusedBatchNormFp16(in_tensors_.at(0)->Data(), scale_, offset_, mean_, variance_, param, task_id, + out_tensors_.at(0)->Data()); + return mindspore::lite::RET_OK; +} + +kernel::LiteKernel *CpuFusedBatchnormFp16KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *op_parameter, const lite::Context *ctx, + const kernel::KernelKey &desc, + const mindspore::lite::PrimitiveC *primitive) { + FusedBatchnormFp16CPUKernel *kernel = + new (std::nothrow) FusedBatchnormFp16CPUKernel(op_parameter, inputs, outputs, ctx, primitive); + if (kernel == nullptr) { + MS_LOG(ERROR) << "new FusedBatchnormFp16CPUKernel fail!"; + return nullptr; + } + auto ret = kernel->Init(); + if (ret != RET_OK) { + delete kernel; + MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(op_parameter->type_)); + return nullptr; + } + return kernel; +} + +// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_FusedBatchNorm, CpuFusedBatchnormFp16KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.h new file mode 100644 index 00000000000..ab12c1eb121 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.h @@ -0,0 +1,36 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_FUSED_BATCHNORM_FP16_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_FUSED_BATCHNORM_FP16_H_ + +#include +#include "src/runtime/kernel/arm/fp32/fused_batchnorm.h" + +namespace mindspore::kernel { +class FusedBatchnormFp16CPUKernel : public FusedBatchnormCPUKernel { + public: + FusedBatchnormFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const Context *ctx, + const mindspore::lite::PrimitiveC *primitive) + : FusedBatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {} + virtual ~FusedBatchnormFp16CPUKernel() {} + + virtual int DoExecute(int task_id); +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_FUSED_BATCHNORM_FP16_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc index 7aa01a96950..6bfa90c763f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc @@ -15,50 +15,12 @@ */ #include "src/runtime/kernel/arm/fp32/batchnorm.h" -#include "schema/model_generated.h" #include "src/kernel_registry.h" -#include "include/errorcode.h" -#include "src/runtime/runtime_api.h" -#include "nnacl/batchnorm_parameter.h" -#include "nnacl/fp32/batchnorm.h" -using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::KernelRegistrar; -using mindspore::lite::RET_ERROR; -using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_BatchNorm; namespace mindspore::kernel { -BatchnormCPUKernel::~BatchnormCPUKernel() { - if (mean_addr_ != nullptr) { - free(mean_addr_); - mean_addr_ = nullptr; - } - if (var_addr_ != nullptr) { - free(var_addr_); - var_addr_ = nullptr; - } -} - -int BatchnormCPUKernel::InitConstTensor() { - auto mean = in_tensors_[1]; - mean_addr_ = reinterpret_cast(malloc(mean->ElementsNum() * sizeof(float))); - if (mean_addr_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } - memcpy(mean_addr_, mean->Data(), mean->ElementsNum() * sizeof(float)); - - auto variance = in_tensors_[2]; - var_addr_ = reinterpret_cast(malloc(variance->ElementsNum() * sizeof(float))); - if (var_addr_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } - memcpy(var_addr_, variance->Data(), variance->ElementsNum() * sizeof(float)); - return RET_OK; -} - int BatchnormCPUKernel::Init() { if (!InferShapeDone()) { return RET_OK; @@ -67,62 +29,72 @@ int BatchnormCPUKernel::Init() { } int BatchnormCPUKernel::ReSize() { - if (mean_addr_ != nullptr) { - free(mean_addr_); - mean_addr_ = nullptr; + FreeMeanAndVariance(); + FillParam(); + return InitConstTensor(); +} + +void BatchnormCPUKernel::FreeMeanAndVariance() { + if (mean_ != nullptr) { + free(mean_); + mean_ = nullptr; } - if (var_addr_ != nullptr) { - free(var_addr_); - var_addr_ = nullptr; + if (variance_ != nullptr) { + free(variance_); + variance_ = nullptr; } +} + +void BatchnormCPUKernel::FillParam() { auto input_shapes = in_tensors_[0]->shape(); auto n_dim = input_shapes.size(); - batchnorm_param_->channel_ = input_shapes[n_dim - 1]; - batchnorm_param_->unit_ = 1; + auto param = reinterpret_cast(op_parameter_); + param->channel_ = input_shapes[n_dim - 1]; + param->unit_ = 1; for (size_t i = 0; i < n_dim - 1; i++) { - batchnorm_param_->unit_ *= input_shapes[i]; + param->unit_ *= input_shapes[i]; } - batchnorm_param_->op_parameter_.thread_num_ = - MSMIN(batchnorm_param_->op_parameter_.thread_num_, batchnorm_param_->channel_); +} - auto ret = InitConstTensor(); - if (ret != 0) { - MS_LOG(ERROR) << "Batchnorm fp32 InitConstTensor failed."; +int BatchnormCPUKernel::InitConstTensor() { + mean_ = malloc(in_tensors_[1]->Size()); + variance_ = malloc(in_tensors_[2]->Size()); + if (mean_ == nullptr || variance_ == nullptr) { + MS_LOG(ERROR) << "Memory allocation failed"; + FreeMeanAndVariance(); return RET_ERROR; } - return RET_OK; -} - -int BatchnormCPUKernel::DoExecute(int task_id) { - BatchNorm(out_addr_, in_addr_, mean_addr_, var_addr_, task_id, batchnorm_param_); - return RET_OK; -} - -int BatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { - auto g_kernel = reinterpret_cast(cdata); - auto ret = g_kernel->DoExecute(task_id); - if (ret != RET_OK) { - MS_LOG(ERROR) << "BatchnormRun error task_id[" << task_id << "] error_code[" << ret << "]"; - return ret; - } + memcpy(mean_, in_tensors_[1]->Data(), in_tensors_[1]->Size()); + memcpy(variance_, in_tensors_[2]->Data(), in_tensors_[2]->Size()); return RET_OK; } int BatchnormCPUKernel::Run() { - auto prepare_ret = Prepare(); - if (prepare_ret != RET_OK) { - MS_LOG(ERROR) << "Prepare fail! Ret error code: " << prepare_ret; - return prepare_ret; - } - in_addr_ = reinterpret_cast(in_tensors_.at(0)->Data()); - out_addr_ = reinterpret_cast(out_tensors_.at(0)->Data()); - - int ret = LiteBackendParallelLaunch(BatchNormRun, this, batchnorm_param_->op_parameter_.thread_num_); + auto ret = Prepare(); if (ret != RET_OK) { - MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; + MS_LOG(ERROR) << "Prepare fail! Ret error code: " << ret; return ret; } - return RET_OK; + ret = LiteBackendParallelLaunch(BatchNormRun, this, op_parameter_->thread_num_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; + } + return ret; +} + +int BatchnormCPUKernel::DoExecute(int task_id) { + auto param = reinterpret_cast(op_parameter_); + BatchNormFp32(in_tensors_.at(0)->Data(), mean_, variance_, param, task_id, out_tensors_.at(0)->Data()); + return mindspore::lite::RET_OK; +} + +int BatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { + auto kernel = reinterpret_cast(cdata); + auto ret = kernel->DoExecute(task_id); + if (ret != RET_OK) { + MS_LOG(ERROR) << "BatchnormRun error task_id[" << task_id << "] error_code[" << ret << "]"; + } + return ret; } kernel::LiteKernel *CpuBatchnormKernelCreator(const std::vector &inputs, @@ -131,7 +103,6 @@ kernel::LiteKernel *CpuBatchnormKernelCreator(const std::vector &inputs, const std::vector &outputs, const Context *ctx, const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive) { - batchnorm_param_ = reinterpret_cast(parameter); - } - ~BatchnormCPUKernel() override; + : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} + virtual ~BatchnormCPUKernel() { FreeMeanAndVariance(); } int Init() override; int ReSize() override; int Run() override; - int InitConstTensor(); - int DoExecute(int tid); + virtual int InitConstTensor(); + virtual int DoExecute(int task_id); - private: - float *in_addr_ = nullptr; - float *mean_addr_ = nullptr; - float *var_addr_ = nullptr; - float *out_addr_ = nullptr; - BatchNormParameter *batchnorm_param_; + protected: + void FillParam(); + void FreeMeanAndVariance(); + void *mean_ = nullptr; + void *variance_ = nullptr; }; + +int BatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata); } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BATCHNORM_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc index e082a8818a4..7f2a66f9ae7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc @@ -15,133 +15,59 @@ */ #include "src/runtime/kernel/arm/fp32/fused_batchnorm.h" -#include "schema/model_generated.h" #include "src/kernel_registry.h" -#include "include/errorcode.h" -#include "src/runtime/runtime_api.h" -#include "nnacl/batchnorm_parameter.h" -#include "nnacl/fp32/batchnorm.h" -using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::KernelRegistrar; -using mindspore::lite::RET_ERROR; -using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_FusedBatchNorm; namespace mindspore::kernel { -FusedBatchnormCPUKernel::~FusedBatchnormCPUKernel() { FreeTmpBuffer(); } +int FusedBatchnormCPUKernel::ReSize() { + FreeMeanAndVariance(); + FreeScaleAndOffset(); + FillParam(); + return InitConstTensor(); +} -void FusedBatchnormCPUKernel::FreeTmpBuffer() { - if (scale_addr_ != nullptr) { - free(scale_addr_); - scale_addr_ = nullptr; +void FusedBatchnormCPUKernel::FreeScaleAndOffset() { + if (scale_ != nullptr) { + free(scale_); + scale_ = nullptr; } - if (offset_addr_ != nullptr) { - free(offset_addr_); - offset_addr_ = nullptr; - } - if (mean_addr_ != nullptr) { - free(mean_addr_); - mean_addr_ = nullptr; - } - if (var_addr_ != nullptr) { - free(var_addr_); - var_addr_ = nullptr; + if (offset_ != nullptr) { + free(offset_); + offset_ = nullptr; } } int FusedBatchnormCPUKernel::InitConstTensor() { auto scale = in_tensors_[1]; - scale_addr_ = reinterpret_cast(malloc(scale->ElementsNum() * sizeof(float))); - if (scale_addr_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } - memcpy(scale_addr_, scale->Data(), scale->ElementsNum() * sizeof(float)); - auto offset = in_tensors_[2]; - offset_addr_ = reinterpret_cast(malloc(offset->ElementsNum() * sizeof(float))); - if (offset_addr_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } - memcpy(offset_addr_, offset->Data(), offset->ElementsNum() * sizeof(float)); - auto mean = in_tensors_[3]; - mean_addr_ = reinterpret_cast(malloc(mean->ElementsNum() * sizeof(float))); - if (mean_addr_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } - memcpy(mean_addr_, mean->Data(), mean->ElementsNum() * sizeof(float)); - auto variance = in_tensors_[4]; - var_addr_ = reinterpret_cast(malloc(variance->ElementsNum() * sizeof(float))); - if (var_addr_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; + + scale_ = malloc(scale->Size()); + offset_ = malloc(offset->Size()); + mean_ = malloc(mean->Size()); + variance_ = malloc(variance->Size()); + + if (scale_ == nullptr || offset_ == nullptr || mean_ == nullptr || variance_ == nullptr) { + FreeMeanAndVariance(); + FreeScaleAndOffset(); + MS_LOG(ERROR) << "Memory allocation failed"; return RET_ERROR; } - memcpy(var_addr_, variance->Data(), variance->ElementsNum() * sizeof(float)); + memcpy(scale_, scale->Data(), scale->Size()); + memcpy(offset_, offset->Data(), offset->Size()); + memcpy(mean_, mean->Data(), mean->Size()); + memcpy(variance_, variance->Data(), variance->Size()); return RET_OK; } -int FusedBatchnormCPUKernel::Init() { - if (!InferShapeDone()) { - return RET_OK; - } - return ReSize(); -} - -int FusedBatchnormCPUKernel::ReSize() { - FreeTmpBuffer(); - auto input_shapes = in_tensors_[0]->shape(); - auto n_dim = input_shapes.size(); - batchnorm_param_->channel_ = input_shapes[n_dim - 1]; - batchnorm_param_->unit_ = 1; - for (size_t i = 0; i < n_dim - 1; i++) { - batchnorm_param_->unit_ *= input_shapes[i]; - } - batchnorm_param_->op_parameter_.thread_num_ = - MSMIN(batchnorm_param_->op_parameter_.thread_num_, batchnorm_param_->channel_); - - auto ret = InitConstTensor(); - if (ret != 0) { - MS_LOG(ERROR) << "FusedBatchnorm fp32 InitConstTensor failed."; - return RET_ERROR; - } - return RET_OK; -} - -int FusedBatchnormCPUKernel::Execute(int task_id) { - FusedBatchNorm(out_addr_, in_addr_, scale_addr_, offset_addr_, mean_addr_, var_addr_, task_id, batchnorm_param_); - return RET_OK; -} - -int FusedBatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { - auto g_kernel = reinterpret_cast(cdata); - auto ret = g_kernel->Execute(task_id); - if (ret != RET_OK) { - MS_LOG(ERROR) << "FusedBatchnormRun error task_id[" << task_id << "] error_code[" << ret << "]"; - return ret; - } - return RET_OK; -} - -int FusedBatchnormCPUKernel::Run() { - auto prepare_ret = Prepare(); - if (prepare_ret != RET_OK) { - MS_LOG(ERROR) << "Prepare fail! Ret error code: " << prepare_ret; - return prepare_ret; - } - in_addr_ = reinterpret_cast(in_tensors_.at(0)->Data()); - out_addr_ = reinterpret_cast(out_tensors_.at(0)->Data()); - - int ret = LiteBackendParallelLaunch(FusedBatchNormRun, this, batchnorm_param_->op_parameter_.thread_num_); - if (ret != RET_OK) { - MS_LOG(ERROR) << "FusedBatchnormRun error error_code[" << ret << "]"; - return ret; - } - return RET_OK; +int FusedBatchnormCPUKernel::DoExecute(int task_id) { + auto param = reinterpret_cast(op_parameter_); + FusedBatchNormFp32(in_tensors_.at(0)->Data(), scale_, offset_, mean_, variance_, param, task_id, + out_tensors_.at(0)->Data()); + return mindspore::lite::RET_OK; } kernel::LiteKernel *CpuFusedBatchnormKernelCreator(const std::vector &inputs, @@ -149,11 +75,6 @@ kernel::LiteKernel *CpuFusedBatchnormKernelCreator(const std::vector -#include "src/lite_kernel.h" -#include "nnacl/batchnorm_parameter.h" +#include "src/runtime/kernel/arm/fp32/batchnorm.h" namespace mindspore::kernel { -class FusedBatchnormCPUKernel : public LiteKernel { +class FusedBatchnormCPUKernel : public BatchnormCPUKernel { public: FusedBatchnormCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::Context *ctx, const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive) { - batchnorm_param_ = reinterpret_cast(parameter); - } - ~FusedBatchnormCPUKernel() override; + : BatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {} + ~FusedBatchnormCPUKernel() { FreeScaleAndOffset(); } - int Init() override; int ReSize() override; - int Run() override; - int InitConstTensor(); - int Execute(int task_id); + int InitConstTensor() override; + int DoExecute(int task_id) override; - private: - void FreeTmpBuffer(); - float *in_addr_ = nullptr; - float *mean_addr_ = nullptr; - float *var_addr_ = nullptr; - float *scale_addr_ = nullptr; - float *offset_addr_ = nullptr; - float *out_addr_ = nullptr; - - BatchNormParameter *batchnorm_param_; + protected: + void FreeScaleAndOffset(); + void *scale_ = nullptr; + void *offset_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc index 4ac9b4473df..a2114587597 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc @@ -31,40 +31,32 @@ TEST_F(TestBatchnormFp32, BNTest) { -1.1983503, -6.6790967, 6.383416, -13.3213005, -8.693595, 9.476344}; std::vector in_data1 = {12.352293, 5.122387, 14.249514}; std::vector in_data2 = {14.632595, 0.70900035, 11.179003}; - std::vector inputs_tensor; - std::vector outputs_tensor; BatchNormParameter op_param; op_param.op_parameter_.type_ = schema::PrimitiveType_BatchNorm; op_param.epsilon_ = 0.001f; - std::vector shape = {1, 2, 2, 3}; - lite::tensor::Tensor input0_tensor; - lite::tensor::Tensor input1_tensor; - lite::tensor::Tensor input2_tensor; - inputs_tensor.push_back(&input0_tensor); - inputs_tensor.push_back(&input1_tensor); - inputs_tensor.push_back(&input2_tensor); + lite::tensor::Tensor input0_tensor(kNumberTypeFloat32, {1, 2, 2, 3}); + lite::tensor::Tensor input1_tensor(kNumberTypeFloat32, {3}); + lite::tensor::Tensor input2_tensor(kNumberTypeFloat32, {3}); input0_tensor.SetData(in_data.data()); input1_tensor.SetData(in_data1.data()); input2_tensor.SetData(in_data2.data()); - input0_tensor.set_shape(shape); - input1_tensor.set_shape({3}); - input2_tensor.set_shape({3}); + std::vector inputs_tensor = {&input0_tensor, &input1_tensor, &input2_tensor}; std::vector output(12); std::vector corr_out = {-6.1533737, 7.4904885, -0.8563998, -0.289212, -9.356432, 0.13245535, -3.5422924, -14.005781, -2.3525476, -6.7113695, -16.396551, -1.4275324}; - lite::tensor::Tensor output0_tensor; - outputs_tensor.push_back(&output0_tensor); + lite::tensor::Tensor output0_tensor(kNumberTypeFloat32, {1, 2, 2, 3}); output0_tensor.SetData(output.data()); - output0_tensor.set_shape(shape); + std::vector outputs_tensor = {&output0_tensor}; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_BatchNorm}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); ASSERT_NE(creator, nullptr); lite::Context ctx; - ctx.thread_num_ = 1; + ctx.thread_num_ = 2; kernel::LiteKernel *kernel = creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), &ctx, desc, nullptr); ASSERT_NE(kernel, nullptr); @@ -82,7 +74,6 @@ TEST_F(TestBatchnormFp32, BNTest) { input1_tensor.SetData(nullptr); input2_tensor.SetData(nullptr); output0_tensor.SetData(nullptr); - MS_LOG(INFO) << "TestBathNormFp32 accuracy passed"; } TEST_F(TestBatchnormFp32, FusedBNTest) { @@ -92,118 +83,102 @@ TEST_F(TestBatchnormFp32, FusedBNTest) { std::vector offset = {27.888096, 24.533648, 15.335093}; std::vector mean = {11.5127125, 0.47681615, 5.851508}; std::vector var = {1.270583, 13.005714, 6.089223}; - std::vector inputs_tensor; - std::vector outputs_tensor; BatchNormParameter op_param; op_param.op_parameter_.type_ = schema::PrimitiveType_BatchNorm; op_param.epsilon_ = 0.001f; - std::vector shape = {1, 2, 2, 3}; - lite::tensor::Tensor input[5]; - input[0].SetData(in_data.data()); - input[1].SetData(scale.data()); - input[2].SetData(offset.data()); - input[3].SetData(mean.data()); - input[4].SetData(var.data()); - - input[0].set_shape(shape); - for (int i = 1; i < 5; i++) { - input[i].set_shape({3}); - } - for (int i = 0; i < 5; i++) { - inputs_tensor.push_back(&input[i]); - } + lite::tensor::Tensor input0(kNumberTypeFloat32, {1, 2, 2, 3}); + lite::tensor::Tensor input1(kNumberTypeFloat32, {3}); + lite::tensor::Tensor input2(kNumberTypeFloat32, {3}); + lite::tensor::Tensor input3(kNumberTypeFloat32, {3}); + lite::tensor::Tensor input4(kNumberTypeFloat32, {3}); + input0.SetData(in_data.data()); + input1.SetData(scale.data()); + input2.SetData(offset.data()); + input3.SetData(mean.data()); + input4.SetData(var.data()); + std::vector inputs_tensor = {&input0, &input1, &input2, &input3, &input4}; std::vector output(12); std::vector corr_out = {-195.5765, 67.03745, -4.243883, -42.028015, 74.37044, 9.075897, 5.1857452, 56.60399, -77.215096, -181.18402, 49.81066, -59.204563}; - lite::tensor::Tensor output0_tensor; - outputs_tensor.push_back(&output0_tensor); - output0_tensor.SetData(output.data()); - output0_tensor.set_shape(shape); + lite::tensor::Tensor output0(kNumberTypeFloat32, {1, 2, 2, 3}); + output0.SetData(output.data()); + std::vector outputs_tensor = {&output0}; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_FusedBatchNorm}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); ASSERT_NE(creator, nullptr); lite::Context ctx; - ctx.thread_num_ = 1; + ctx.thread_num_ = 2; kernel::LiteKernel *kernel = creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), &ctx, desc, nullptr); ASSERT_NE(kernel, nullptr); - auto output_tensor_shape = output0_tensor.shape(); kernel->Run(); printf("==================output data=================\n"); - for (int i = 0; i < output0_tensor.ElementsNum(); i++) { + for (int i = 0; i < output0.ElementsNum(); i++) { std::cout << output[i] << " ,"; } std::cout << std::endl; - CompareOutputData(output.data(), corr_out.data(), output0_tensor.ElementsNum(), 0.001); + CompareOutputData(output.data(), corr_out.data(), output0.ElementsNum(), 0.001); - for (int i = 1; i < 5; i++) { - input[i].SetData(nullptr); - } - output0_tensor.SetData(nullptr); - MS_LOG(INFO) << "TestFusedBathNormFp32 accuracy passed"; + input0.SetData(nullptr); + input1.SetData(nullptr); + input2.SetData(nullptr); + input3.SetData(nullptr); + input4.SetData(nullptr); + output0.SetData(nullptr); } TEST_F(TestBatchnormFp32, easyTest) { std::vector in_data = {1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6}; std::vector in_data1 = {0.1, 0.6}; std::vector in_data2 = {3, 4}; - std::vector inputs_tensor; - std::vector outputs_tensor; BatchNormParameter op_param; op_param.op_parameter_.type_ = schema::PrimitiveType_BatchNorm; op_param.epsilon_ = 0.001f; - std::vector shape = {1, 1, 6, 2}; - lite::tensor::Tensor input0_tensor; - lite::tensor::Tensor input1_tensor; - lite::tensor::Tensor input2_tensor; - inputs_tensor.push_back(&input0_tensor); - inputs_tensor.push_back(&input1_tensor); - inputs_tensor.push_back(&input2_tensor); - input0_tensor.SetData(in_data.data()); - input1_tensor.SetData(in_data1.data()); - input2_tensor.SetData(in_data2.data()); - input0_tensor.set_shape(shape); - input1_tensor.set_shape({2}); - input2_tensor.set_shape({2}); + lite::tensor::Tensor input0(kNumberTypeFloat32, {1, 1, 6, 2}); + lite::tensor::Tensor input1(kNumberTypeFloat32, {2}); + lite::tensor::Tensor input2(kNumberTypeFloat32, {2}); + input0.SetData(in_data.data()); + input1.SetData(in_data1.data()); + input2.SetData(in_data2.data()); + std::vector inputs_tensor = {&input0, &input1, &input2}; std::vector output(12); std::vector corr_out = {0.519529, 1.69979, 1.09678, 2.19973, 1.67404, 2.69966, -0.63498, -2.29971, -1.21223, -2.79965, -1.78949, -3.29959}; - lite::tensor::Tensor output0_tensor; - outputs_tensor.push_back(&output0_tensor); - output0_tensor.SetData(output.data()); - output0_tensor.set_shape(shape); + lite::tensor::Tensor output0(kNumberTypeFloat32, {1, 1, 6, 2}); + output0.SetData(output.data()); + std::vector outputs_tensor = {&output0}; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_BatchNorm}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); ASSERT_NE(creator, nullptr); lite::Context ctx; - ctx.thread_num_ = 1; + ctx.thread_num_ = 2; kernel::LiteKernel *kernel = creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), &ctx, desc, nullptr); ASSERT_NE(kernel, nullptr); - auto output_tensor_shape = output0_tensor.shape(); kernel->Run(); printf("==================output data=================\n"); - for (int i = 0; i < output0_tensor.ElementsNum(); i++) { + for (int i = 0; i < output0.ElementsNum(); i++) { std::cout << output[i] << " ,"; } std::cout << std::endl; - CompareOutputData(output.data(), corr_out.data(), output0_tensor.ElementsNum(), 0.001); + CompareOutputData(output.data(), corr_out.data(), output0.ElementsNum(), 0.001); - input0_tensor.SetData(nullptr); - input1_tensor.SetData(nullptr); - input2_tensor.SetData(nullptr); - output0_tensor.SetData(nullptr); - MS_LOG(INFO) << "TestBathNormFp32 accuracy passed"; + input0.SetData(nullptr); + input1.SetData(nullptr); + input2.SetData(nullptr); + output0.SetData(nullptr); } } // namespace mindspore