forked from mindspore-Ecosystem/mindspore
!8041 rewrite the instanceNorm operator
Merge pull request !8041 from XianglongZeng/myms
This commit is contained in:
commit
58adf298ea
|
@ -13,30 +13,37 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/fp32/instance_norm.h"
|
||||
#include <math.h>
|
||||
#include "nnacl/instance_norm_parameter.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
void InstanceNormFp32(const void *input, const void *mean, const void *variance, InstanceNormParameter *param,
|
||||
int task_id, void *output) {
|
||||
int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_);
|
||||
int completed_units = task_id * units_per_thread;
|
||||
if (completed_units >= param->unit_) {
|
||||
return;
|
||||
int InstanceNorm(const int outer_size, const int inner_size, const float *src_data, const float *scale_data,
|
||||
const float *bias_data, InstanceNormParameter *param, float *dst_data, const int task_id,
|
||||
const int thread_num) {
|
||||
if (src_data == NULL || dst_data == NULL || scale_data == NULL || bias_data == NULL) {
|
||||
return NNACL_NULL_PTR;
|
||||
}
|
||||
int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units);
|
||||
int cur_offset = completed_units * param->channel_;
|
||||
for (int n = 0; n < param->batch_; n++) {
|
||||
for (int hw = 0; hw < cur_unit; hw++) {
|
||||
for (int c = 0; c < param->channel_; c++) {
|
||||
float variance_sqrt = sqrt(((const float *)variance)[n * param->channel_ + c] + param->epsilon_);
|
||||
((float *)output)[cur_offset + c] =
|
||||
(((const float *)input)[cur_offset + c] - ((const float *)mean)[n * param->channel_ + c]) / variance_sqrt;
|
||||
}
|
||||
cur_offset += param->channel_;
|
||||
int i, j;
|
||||
for (j = task_id; j < outer_size; j += thread_num) {
|
||||
int offset = (j / param->channel_) * inner_size * param->channel_;
|
||||
const float *src = src_data + offset;
|
||||
float *dst = dst_data + offset;
|
||||
float mean = 0.0f;
|
||||
float square_mean = 0.0f;
|
||||
for (i = 0; i < inner_size; i++) {
|
||||
int idx = j % param->channel_ + i * param->channel_;
|
||||
mean += src[idx];
|
||||
square_mean += src[idx] * src[idx];
|
||||
}
|
||||
mean /= (float)inner_size;
|
||||
square_mean /= (float)inner_size;
|
||||
float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_);
|
||||
for (i = 0; i < inner_size; ++i) {
|
||||
int idx = j % param->channel_ + i * param->channel_;
|
||||
int scale_idx = (j / param->channel_) * param->channel_ + j % param->channel_;
|
||||
dst[idx] = ((src[idx] - mean) * deno) * scale_data[scale_idx] + bias_data[scale_idx];
|
||||
}
|
||||
cur_offset += (param->unit_ - cur_unit) * param->channel_;
|
||||
}
|
||||
return NNACL_OK;
|
||||
}
|
||||
|
|
|
@ -13,20 +13,19 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_
|
||||
#define MINDSPORE_LITE_NNACL_FP32_INSTANCE_NORM_H_
|
||||
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/instance_norm_parameter.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void InstanceNormFp32(const void *input, const void *mean, const void *variance, InstanceNormParameter *param,
|
||||
int task_id, void *output);
|
||||
void FusedInstanceNormFp32(const void *input, const void *scale, const void *offset, const void *mean,
|
||||
const void *variance, InstanceNormParameter *param, int task_id, void *output);
|
||||
int InstanceNorm(const int outer_size, const int inner_size, const float *src_data, const float *scale_data,
|
||||
const float *bias_data, InstanceNormParameter *param, float *dst_data, const int task_id,
|
||||
const int thread_num);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -23,10 +23,7 @@ typedef struct InstanceNormParameter {
|
|||
OpParameter op_parameter_;
|
||||
float epsilon_;
|
||||
float momentum_;
|
||||
int unit_;
|
||||
int batch_;
|
||||
int channel_;
|
||||
bool fused_;
|
||||
} InstanceNormParameter;
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_INSTANCE_NORM_PARAMETER_H_
|
||||
|
|
|
@ -33,7 +33,6 @@ OpParameter *PopulateInstanceNormParameter(const mindspore::lite::PrimitiveC *pr
|
|||
memset(instance_norm_param, 0, sizeof(InstanceNormParameter));
|
||||
instance_norm_param->op_parameter_.type_ = primitive->Type();
|
||||
instance_norm_param->epsilon_ = param->GetEpsilon();
|
||||
instance_norm_param->fused_ = false;
|
||||
return reinterpret_cast<OpParameter *>(instance_norm_param);
|
||||
}
|
||||
|
||||
|
|
|
@ -13,11 +13,13 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp32/instance_norm.h"
|
||||
#include "nnacl/fp32/instance_norm.h"
|
||||
#include <vector>
|
||||
#include "schema/model_generated.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
@ -32,47 +34,60 @@ int InstanceNormCPUKernel::Init() {
|
|||
}
|
||||
|
||||
int InstanceNormCPUKernel::ReSize() {
|
||||
auto input_shapes = in_tensors_[0]->shape();
|
||||
auto input_shapes = in_tensors_.front()->shape();
|
||||
auto n_dim = input_shapes.size();
|
||||
auto param = reinterpret_cast<InstanceNormParameter *>(op_parameter_);
|
||||
param->batch_ = input_shapes[0];
|
||||
param->channel_ = input_shapes[n_dim - 1];
|
||||
param->unit_ = 1;
|
||||
for (size_t i = 1; i < n_dim - 1; i++) {
|
||||
param->unit_ *= input_shapes[i];
|
||||
outer_size_ = input_shapes[0] * input_shapes[n_dim - 1];
|
||||
inner_size_ = 1;
|
||||
for (size_t i = 0; i < n_dim - 1; ++i) {
|
||||
inner_size_ *= input_shapes[i];
|
||||
}
|
||||
param_->channel_ = input_shapes[n_dim - 1];
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int InstanceNormCPUKernel::DoInstanceNorm(int task_id) {
|
||||
int ret = InstanceNorm(outer_size_, inner_size_, src_data_, scale_data_, bias_data_, param_, dst_data_, task_id,
|
||||
op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "DoInstanceNorm error error_code[" << ret << "]";
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int InstanceNormRun(void *cdata, int task_id) {
|
||||
auto InstanceNormData = reinterpret_cast<InstanceNormCPUKernel *>(cdata);
|
||||
auto ret = InstanceNormData->DoInstanceNorm(task_id);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "InstanceNormRun error task_id[" << task_id << "] error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int InstanceNormCPUKernel::Run() {
|
||||
src_data_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
|
||||
scale_data_ = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
|
||||
bias_data_ = reinterpret_cast<float *>(in_tensors_.at(2)->MutableData());
|
||||
dst_data_ = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
|
||||
auto ret = ParallelLaunch(this->context_->thread_pool_, InstanceNormRun, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "InstanceNormRun error error_code[" << ret << "]";
|
||||
MS_LOG(ERROR) << "FillRun error error_code[" << ret << "]";
|
||||
return ret;
|
||||
}
|
||||
return ret;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int InstanceNormCPUKernel::DoExecute(int task_id) {
|
||||
auto param = reinterpret_cast<InstanceNormParameter *>(op_parameter_);
|
||||
InstanceNormFp32(in_tensors_.at(0)->MutableData(), in_tensors_.at(1)->MutableData(), in_tensors_.at(2)->MutableData(),
|
||||
param, task_id, out_tensors_.at(0)->MutableData());
|
||||
return mindspore::lite::RET_OK;
|
||||
}
|
||||
|
||||
int InstanceNormRun(void *cdata, int task_id) {
|
||||
auto kernel = reinterpret_cast<InstanceNormCPUKernel *>(cdata);
|
||||
auto ret = kernel->DoExecute(task_id);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "InstanceNormRun error task_id[" << task_id << "] error_code[" << ret << "]";
|
||||
kernel::LiteKernel *CpuInstanceNormFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::InnerContext *ctx,
|
||||
const kernel::KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
if (opParameter == nullptr) {
|
||||
MS_LOG(ERROR) << "Create kernel failed, opParameter is nullptr, type: PrimitiveType_InstanceNorm. ";
|
||||
return nullptr;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuInstanceNormKernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
|
||||
const lite::InnerContext *ctx, const kernel::KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
MS_ASSERT(opParameter != nullptr);
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_InstanceNorm);
|
||||
auto *kernel = new (std::nothrow) InstanceNormCPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "new InstanceNormCPUKernel fail!";
|
||||
|
@ -89,5 +104,5 @@ kernel::LiteKernel *CpuInstanceNormKernelCreator(const std::vector<lite::Tensor
|
|||
return kernel;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_InstanceNorm, CpuInstanceNormKernelCreator)
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_InstanceNorm, CpuInstanceNormFp32KernelCreator)
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -13,15 +13,13 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "include/context.h"
|
||||
#include "nnacl/instance_norm_parameter.h"
|
||||
#include "src/runtime/runtime_api.h"
|
||||
#include "nnacl/fp32/instance_norm.h"
|
||||
|
||||
using mindspore::lite::InnerContext;
|
||||
|
||||
|
@ -29,18 +27,27 @@ namespace mindspore::kernel {
|
|||
class InstanceNormCPUKernel : public LiteKernel {
|
||||
public:
|
||||
InstanceNormCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
~InstanceNormCPUKernel() override = default;
|
||||
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
param_ = reinterpret_cast<InstanceNormParameter *>(parameter);
|
||||
}
|
||||
~InstanceNormCPUKernel() override{};
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
virtual int DoExecute(int task_id);
|
||||
};
|
||||
int DoInstanceNorm(int thread_id);
|
||||
|
||||
int InstanceNormRun(void *cdata, int task_id);
|
||||
private:
|
||||
InstanceNormParameter *param_ = nullptr;
|
||||
int outer_size_;
|
||||
int inner_size_;
|
||||
float *src_data_ = nullptr;
|
||||
float *dst_data_ = nullptr;
|
||||
float *scale_data_ = nullptr;
|
||||
float *bias_data_ = nullptr;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_INSTANCE_NORM_H_
|
||||
|
|
|
@ -39,7 +39,7 @@ class LayerNormCPUKernel : public LiteKernel {
|
|||
int DoLayerNorm(int thread_id);
|
||||
|
||||
private:
|
||||
LayerNormParameter *param_;
|
||||
LayerNormParameter *param_ = nullptr;
|
||||
int outer_size_;
|
||||
int inner_size_;
|
||||
float *src_data_ = nullptr;
|
||||
|
|
|
@ -45,8 +45,8 @@ TEST_F(TestInstanceNormFp32, INTest1) {
|
|||
std::vector<lite::Tensor *> inputs_tensor = {&input0_tensor, &input1_tensor, &input2_tensor};
|
||||
|
||||
std::vector<float> output(12);
|
||||
std::vector<float> corr_out = {-6.1533737, 7.4904885, -0.8563998, -0.289212, -9.356432, 0.13245535,
|
||||
-3.5422924, -14.005781, -2.3525476, -6.7113695, -16.396551, -1.4275324};
|
||||
std::vector<float> corr_out = {5.0145645, 9.248516, 15.439679, 33.51017, 0.0012711287, 31.0666883,
|
||||
17.70254, -2.5507483, -8.204435, 2.3031063, -3.8630369, 6.4138837};
|
||||
|
||||
lite::Tensor output0_tensor(kNumberTypeFloat32, {1, 2, 2, 3});
|
||||
output0_tensor.set_data(output.data());
|
||||
|
@ -80,8 +80,8 @@ TEST_F(TestInstanceNormFp32, INTest1) {
|
|||
TEST_F(TestInstanceNormFp32, INTest2) {
|
||||
std::vector<float> in_data = {-11.18675, 11.433986, 11.386012, 11.245945, -2.7614849, 14.692399,
|
||||
-1.1983503, -6.6790967, 6.383416, -13.3213005, -8.693595, 9.476344,
|
||||
-11.18675, 11.433986, 11.386012, 11.245945, -2.7614849, 14.692399,
|
||||
-1.1983503, -6.6790967, 6.383416, -13.3213005, -8.693595, 9.476344};
|
||||
-12.18675, 12.433986, 12.386012, 12.245945, -3.7614849, 15.692399,
|
||||
-2.1983503, -7.6790967, 7.383416, -14.3213005, -9.693595, 10.476344};
|
||||
std::vector<float> in_data1 = {12.352293, 5.122387, 14.249514, 12.352293, 5.122387, 14.249514};
|
||||
std::vector<float> in_data2 = {14.632595, 0.70900035, 11.179003, 14.632595, 0.70900035, 11.179003};
|
||||
|
||||
|
@ -90,18 +90,18 @@ TEST_F(TestInstanceNormFp32, INTest2) {
|
|||
op_param.epsilon_ = 0.001f;
|
||||
|
||||
lite::Tensor input0_tensor(kNumberTypeFloat32, {2, 2, 2, 3});
|
||||
lite::Tensor input1_tensor(kNumberTypeFloat32, {6});
|
||||
lite::Tensor input2_tensor(kNumberTypeFloat32, {6});
|
||||
lite::Tensor input1_tensor(kNumberTypeFloat32, {2, 3});
|
||||
lite::Tensor input2_tensor(kNumberTypeFloat32, {2, 3});
|
||||
input0_tensor.set_data(in_data.data());
|
||||
input1_tensor.set_data(in_data1.data());
|
||||
input2_tensor.set_data(in_data2.data());
|
||||
std::vector<lite::Tensor *> inputs_tensor = {&input0_tensor, &input1_tensor, &input2_tensor};
|
||||
|
||||
std::vector<float> output(24);
|
||||
std::vector<float> corr_out = {-6.1533737, 7.4904885, -0.8563998, -0.289212, -9.356432, 0.13245535,
|
||||
-3.5422924, -14.005781, -2.3525476, -6.7113695, -16.396551, -1.4275324,
|
||||
-6.1533737, 7.4904885, -0.8563998, -0.289212, -9.356432, 0.13245535,
|
||||
-3.5422924, -14.005781, -2.3525476, -6.7113695, -16.396551, -1.4275324};
|
||||
std::vector<float> corr_out = {5.0145645, 9.248516, 15.439679, 33.51017, 0.0012711287, 31.0666883,
|
||||
17.70254, -2.5507483, -8.204435, 2.3031063, -3.8630369, 6.4138837,
|
||||
5.133601, 9.310399, 15.439679, 33.886883, -0.22505027, 31.066883,
|
||||
16.888313, -2.5316327, -8.204435, 2.6215858, -3.717714, 6.4138837};
|
||||
|
||||
lite::Tensor output0_tensor(kNumberTypeFloat32, {2, 2, 2, 3});
|
||||
output0_tensor.set_data(output.data());
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
constexpr int32_t kSingleGrounp = 1;
|
||||
constexpr int32_t kSingleGroup = 1;
|
||||
bool OnnxConvParser::ParseGroupConvolution(const std::unique_ptr<schema::Conv2DT> &attr, schema::CNodeT *op) {
|
||||
MS_LOG(DEBUG) << "onnx DepthwiseConvParser";
|
||||
if (attr == nullptr || attr->group != attr->channelIn) {
|
||||
|
@ -172,7 +172,7 @@ STATUS OnnxConvParser::Parse(const onnx::GraphProto &onnx_graph, const onnx::Nod
|
|||
attr->activationType = schema::ActivationType_NO_ACTIVATION;
|
||||
}
|
||||
|
||||
if (attr->group > kSingleGrounp && attr->group == attr->channelIn) {
|
||||
if (attr->group > kSingleGroup && attr->group == attr->channelIn) {
|
||||
if (!ParseGroupConvolution(attr, op)) {
|
||||
MS_LOG(ERROR) << "Convert Convolution to Depthwise failed";
|
||||
return RET_ERROR;
|
||||
|
|
Loading…
Reference in New Issue