forked from mindspore-Ecosystem/mindspore
!6940 scale with relu fusion and calculating
Merge pull request !6940 from zhaozhenlong/lite/op/scale_relu
This commit is contained in:
commit
649cee8ffc
|
@ -77,3 +77,147 @@ void DoScale(float *in_data, float *out_data, float *scale, float *offset, int t
|
||||||
scale_param->inner_size_);
|
scale_param->inner_size_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ScaleInnerRelu(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end,
|
||||||
|
int axis_size, int inner_size) {
|
||||||
|
#ifdef ENABLE_ARM64
|
||||||
|
float32x4_t zeros = {0, 0, 0, 0};
|
||||||
|
#endif
|
||||||
|
for (int out = outer_start; out < outer_end; out++) {
|
||||||
|
int out_offset = out * axis_size * inner_size;
|
||||||
|
for (int i = 0; i < axis_size; i++) {
|
||||||
|
int axis_offset = out_offset + i * inner_size;
|
||||||
|
int in_index = 0;
|
||||||
|
#ifdef ENABLE_ARM64
|
||||||
|
for (; in_index < inner_size - 4; in_index += 4) {
|
||||||
|
int in_offset = axis_offset + in_index;
|
||||||
|
float32x4_t data = vld1q_f32(in_data + in_offset);
|
||||||
|
float32x4_t scale_4 = vdupq_n_f32(scale[i]);
|
||||||
|
float32x4_t offset_4 = vdupq_n_f32(offset[i]);
|
||||||
|
float32x4_t tmp = vfmaq_f32(offset_4, data, scale_4);
|
||||||
|
float32x4_t result = vmaxq_f32(tmp, zeros);
|
||||||
|
vst1q_f32(out_data + in_offset, result);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for (; in_index < inner_size; in_index++) {
|
||||||
|
int in_offset = axis_offset + in_index;
|
||||||
|
float tmp = in_data[in_offset] * scale[i] + offset[i];
|
||||||
|
out_data[in_offset] = tmp > 0.0f ? tmp : 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleAxisRelu(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end,
|
||||||
|
int axis_size) {
|
||||||
|
#ifdef ENABLE_ARM64
|
||||||
|
float32x4_t zeros = {0, 0, 0, 0};
|
||||||
|
#endif
|
||||||
|
for (int out = outer_start; out < outer_end; out++) {
|
||||||
|
int out_offset = out * axis_size;
|
||||||
|
int index = 0;
|
||||||
|
#ifdef ENABLE_ARM64
|
||||||
|
for (; index < axis_size - 4; index += 4) {
|
||||||
|
int in_offset = out_offset + index;
|
||||||
|
float32x4_t data = vld1q_f32(in_data + in_offset);
|
||||||
|
float32x4_t scale_4 = vld1q_f32(scale + index);
|
||||||
|
float32x4_t offset_4 = vld1q_f32(offset + index);
|
||||||
|
float32x4_t tmp = vfmaq_f32(offset_4, data, scale_4);
|
||||||
|
float32x4_t result = vmaxq_f32(tmp, zeros);
|
||||||
|
vst1q_f32(out_data + in_offset, result);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for (; index < axis_size; index++) {
|
||||||
|
int in_offset = out_offset + index;
|
||||||
|
float tmp = in_data[in_offset] * scale[index] + offset[index];
|
||||||
|
out_data[in_offset] = tmp > 0.0f ? tmp : 0.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DoScaleRelu(float *in_data, float *out_data, float *scale, float *offset, int task_id,
|
||||||
|
ScaleParameter *scale_param) {
|
||||||
|
int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_);
|
||||||
|
int outer_start = task_id * outer_step;
|
||||||
|
int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
|
||||||
|
|
||||||
|
if (scale_param->inner_size_ == 1) {
|
||||||
|
ScaleAxisRelu(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_);
|
||||||
|
} else {
|
||||||
|
ScaleInnerRelu(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_,
|
||||||
|
scale_param->inner_size_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleInnerRelu6(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end,
|
||||||
|
int axis_size, int inner_size) {
|
||||||
|
#ifdef ENABLE_ARM64
|
||||||
|
float32x4_t zeros = {0, 0, 0, 0};
|
||||||
|
float32x4_t bounds = {6, 6, 6, 6};
|
||||||
|
#endif
|
||||||
|
for (int out = outer_start; out < outer_end; out++) {
|
||||||
|
int out_offset = out * axis_size * inner_size;
|
||||||
|
for (int i = 0; i < axis_size; i++) {
|
||||||
|
int axis_offset = out_offset + i * inner_size;
|
||||||
|
int in_index = 0;
|
||||||
|
#ifdef ENABLE_ARM64
|
||||||
|
for (; in_index < inner_size - 4; in_index += 4) {
|
||||||
|
int in_offset = axis_offset + in_index;
|
||||||
|
float32x4_t data = vld1q_f32(in_data + in_offset);
|
||||||
|
float32x4_t scale_4 = vdupq_n_f32(scale[i]);
|
||||||
|
float32x4_t offset_4 = vdupq_n_f32(offset[i]);
|
||||||
|
float32x4_t tmp = vfmaq_f32(offset_4, data, scale_4);
|
||||||
|
float32x4_t result = vminq_f32(vmaxq_f32(tmp, zeros), bounds);
|
||||||
|
vst1q_f32(out_data + in_offset, result);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for (; in_index < inner_size; in_index++) {
|
||||||
|
int in_offset = axis_offset + in_index;
|
||||||
|
float tmp = in_data[in_offset] * scale[i] + offset[i];
|
||||||
|
out_data[in_offset] = MSMIN(MSMAX(tmp, 0.0f), 6.0f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScaleAxisRelu6(float *in_data, float *out_data, float *scale, float *offset, int outer_start, int outer_end,
|
||||||
|
int axis_size) {
|
||||||
|
#ifdef ENABLE_ARM64
|
||||||
|
float32x4_t zeros = {0, 0, 0, 0};
|
||||||
|
float32x4_t bounds = {6, 6, 6, 6};
|
||||||
|
#endif
|
||||||
|
for (int out = outer_start; out < outer_end; out++) {
|
||||||
|
int out_offset = out * axis_size;
|
||||||
|
int index = 0;
|
||||||
|
#ifdef ENABLE_ARM64
|
||||||
|
for (; index < axis_size - 4; index += 4) {
|
||||||
|
int in_offset = out_offset + index;
|
||||||
|
float32x4_t data = vld1q_f32(in_data + in_offset);
|
||||||
|
float32x4_t scale_4 = vld1q_f32(scale + index);
|
||||||
|
float32x4_t offset_4 = vld1q_f32(offset + index);
|
||||||
|
float32x4_t tmp = vfmaq_f32(offset_4, data, scale_4);
|
||||||
|
float32x4_t result = vminq_f32(vmaxq_f32(tmp, zeros), bounds);
|
||||||
|
vst1q_f32(out_data + in_offset, result);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for (; index < axis_size; index++) {
|
||||||
|
int in_offset = out_offset + index;
|
||||||
|
float tmp = in_data[in_offset] * scale[index] + offset[index];
|
||||||
|
out_data[in_offset] = MSMIN(MSMAX(tmp, 0.0f), 6.0f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DoScaleRelu6(float *in_data, float *out_data, float *scale, float *offset, int task_id,
|
||||||
|
ScaleParameter *scale_param) {
|
||||||
|
int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_);
|
||||||
|
int outer_start = task_id * outer_step;
|
||||||
|
int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
|
||||||
|
|
||||||
|
if (scale_param->inner_size_ == 1) {
|
||||||
|
ScaleAxisRelu6(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_);
|
||||||
|
} else {
|
||||||
|
ScaleInnerRelu6(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_,
|
||||||
|
scale_param->inner_size_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -23,6 +23,10 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
void DoScale(float *in_data, float *out_data, float *scale, float *offset, int task_id, ScaleParameter *scale_param);
|
void DoScale(float *in_data, float *out_data, float *scale, float *offset, int task_id, ScaleParameter *scale_param);
|
||||||
|
void DoScaleRelu(float *in_data, float *out_data, float *scale, float *offset, int task_id,
|
||||||
|
ScaleParameter *scale_param);
|
||||||
|
void DoScaleRelu6(float *in_data, float *out_data, float *scale, float *offset, int task_id,
|
||||||
|
ScaleParameter *scale_param);
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -33,6 +33,7 @@ typedef struct ScaleParameter {
|
||||||
int scale_zp_;
|
int scale_zp_;
|
||||||
int offset_zp_;
|
int offset_zp_;
|
||||||
int output_zp_;
|
int output_zp_;
|
||||||
|
int activation_type_;
|
||||||
} ScaleParameter;
|
} ScaleParameter;
|
||||||
|
|
||||||
#endif // MINDSPORE_LITE_NNACL_SCALE_H_
|
#endif // MINDSPORE_LITE_NNACL_SCALE_H_
|
||||||
|
|
|
@ -416,6 +416,7 @@ table BNGradInput {
|
||||||
}
|
}
|
||||||
table Scale {
|
table Scale {
|
||||||
axis: int;
|
axis: int;
|
||||||
|
activationType: ActivationType = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
table Eltwise {
|
table Eltwise {
|
||||||
|
|
|
@ -20,12 +20,16 @@ namespace mindspore {
|
||||||
namespace lite {
|
namespace lite {
|
||||||
#ifdef PRIMITIVE_WRITEABLE
|
#ifdef PRIMITIVE_WRITEABLE
|
||||||
int Scale::GetAxis() const { return this->primitive_->value.AsScale()->axis; }
|
int Scale::GetAxis() const { return this->primitive_->value.AsScale()->axis; }
|
||||||
|
|
||||||
void Scale::SetAxis(int axis) { this->primitive_->value.AsScale()->axis = axis; }
|
void Scale::SetAxis(int axis) { this->primitive_->value.AsScale()->axis = axis; }
|
||||||
|
int Scale::GetActivationType() const { return this->primitive_->value.AsScale()->activationType; }
|
||||||
|
void Scale::SetActivationType(int activation_type) {
|
||||||
|
this->primitive_->value.AsScale()->activationType = (schema::ActivationType)activation_type;
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
int Scale::GetAxis() const { return this->primitive_->value_as_Scale()->axis(); }
|
int Scale::GetAxis() const { return this->primitive_->value_as_Scale()->axis(); }
|
||||||
|
int Scale::GetActivationType() const { return this->primitive_->value_as_Scale()->activationType(); }
|
||||||
int Scale::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) {
|
int Scale::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) {
|
||||||
MS_ASSERT(nullptr != primitive);
|
MS_ASSERT(nullptr != primitive);
|
||||||
MS_ASSERT(nullptr != fbb);
|
MS_ASSERT(nullptr != fbb);
|
||||||
|
@ -34,7 +38,7 @@ int Scale::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::
|
||||||
MS_LOG(ERROR) << "value_as_Scale return nullptr";
|
MS_LOG(ERROR) << "value_as_Scale return nullptr";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
auto val_offset = schema::CreateScale(*fbb, attr->axis());
|
auto val_offset = schema::CreateScale(*fbb, attr->axis(), attr->activationType());
|
||||||
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_Scale, val_offset.o);
|
auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_Scale, val_offset.o);
|
||||||
fbb->Finish(prim_offset);
|
fbb->Finish(prim_offset);
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
|
|
|
@ -32,6 +32,7 @@ class Scale : public PrimitiveC {
|
||||||
Scale() = default;
|
Scale() = default;
|
||||||
explicit Scale(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {}
|
explicit Scale(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {}
|
||||||
void SetAxis(int axis);
|
void SetAxis(int axis);
|
||||||
|
void SetActivationType(int activation_type);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
Scale() = default;
|
Scale() = default;
|
||||||
|
@ -39,6 +40,7 @@ class Scale : public PrimitiveC {
|
||||||
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
|
||||||
#endif
|
#endif
|
||||||
int GetAxis() const;
|
int GetAxis() const;
|
||||||
|
int GetActivationType() const;
|
||||||
};
|
};
|
||||||
} // namespace lite
|
} // namespace lite
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -943,6 +943,7 @@ OpParameter *PopulateScaleParameter(const mindspore::lite::PrimitiveC *primitive
|
||||||
scale_param->op_parameter_.type_ = primitive->Type();
|
scale_param->op_parameter_.type_ = primitive->Type();
|
||||||
auto param = reinterpret_cast<mindspore::lite::Scale *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
|
auto param = reinterpret_cast<mindspore::lite::Scale *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
|
||||||
scale_param->axis_ = param->GetAxis();
|
scale_param->axis_ = param->GetAxis();
|
||||||
|
scale_param->activation_type_ = param->GetActivationType();
|
||||||
return reinterpret_cast<OpParameter *>(scale_param);
|
return reinterpret_cast<OpParameter *>(scale_param);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -35,52 +35,56 @@ ScaleCPUKernel::~ScaleCPUKernel() {
|
||||||
scale_ = nullptr;
|
scale_ = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (offset_ != nullptr) {
|
if (scale_param_->const_offset_) {
|
||||||
free(offset_);
|
if (offset_ != nullptr) {
|
||||||
offset_ = nullptr;
|
free(offset_);
|
||||||
|
offset_ = nullptr;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int ScaleCPUKernel::InitScaleOffset() {
|
int ScaleCPUKernel::InitScaleOffset() {
|
||||||
auto scale_tensor = in_tensors_.at(1);
|
auto scale_tensor = in_tensors_.at(1);
|
||||||
float *scale_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->data_c());
|
if (reinterpret_cast<float *>(scale_tensor->data_c()) != nullptr) {
|
||||||
if (scale_ptr != nullptr) {
|
|
||||||
scale_param_->const_scale_ = true;
|
scale_param_->const_scale_ = true;
|
||||||
if (scale_ != nullptr) {
|
|
||||||
free(scale_);
|
|
||||||
scale_ = nullptr;
|
|
||||||
}
|
|
||||||
scale_ = reinterpret_cast<float *>(malloc(scale_tensor->ElementsNum() * sizeof(float)));
|
scale_ = reinterpret_cast<float *>(malloc(scale_tensor->ElementsNum() * sizeof(float)));
|
||||||
if (scale_ == nullptr) {
|
if (scale_ == nullptr) {
|
||||||
MS_LOG(ERROR) << "Malloc buffer failed.";
|
MS_LOG(ERROR) << "Malloc buffer failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
memcpy(scale_, scale_ptr, scale_tensor->ElementsNum() * sizeof(float));
|
memcpy(scale_, scale_tensor->data_c(), scale_tensor->ElementsNum() * sizeof(float));
|
||||||
} else {
|
} else {
|
||||||
scale_param_->const_scale_ = false;
|
scale_param_->const_scale_ = false;
|
||||||
scale_ = nullptr;
|
scale_ = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (offset_ != nullptr) {
|
if (in_tensors_.size() == 2) {
|
||||||
free(offset_);
|
scale_param_->const_offset_ = true;
|
||||||
|
offset_ = reinterpret_cast<float *>(malloc(scale_tensor->ElementsNum() * sizeof(float)));
|
||||||
|
if (offset_ == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "Malloc data failed";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
memset(offset_, 0, scale_tensor->ElementsNum() * sizeof(float));
|
||||||
|
} else if (in_tensors_.size() == 3 && reinterpret_cast<float *>(in_tensors_.at(2)->data_c()) != nullptr) {
|
||||||
|
scale_param_->const_offset_ = true;
|
||||||
|
auto offset_tensor = in_tensors_.at(2);
|
||||||
|
MS_ASSERT(scale_tensor->ElementsNum() == offset_tensor->ElementsNum());
|
||||||
|
offset_ = reinterpret_cast<float *>(malloc(offset_tensor->ElementsNum() * sizeof(float)));
|
||||||
|
if (offset_ == nullptr) {
|
||||||
|
MS_LOG(ERROR) << "Malloc data failed";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
memcpy(offset_, offset_tensor->data_c(), offset_tensor->ElementsNum() * sizeof(float));
|
||||||
|
} else {
|
||||||
|
scale_param_->const_offset_ = false;
|
||||||
offset_ = nullptr;
|
offset_ = nullptr;
|
||||||
}
|
}
|
||||||
offset_ = reinterpret_cast<float *>(malloc(scale_param_->axis_size_ * sizeof(float)));
|
|
||||||
if (offset_ == nullptr) {
|
|
||||||
MS_LOG(ERROR) << "Malloc buffer failed.";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
memset(offset_, 0, scale_param_->axis_size_ * sizeof(float));
|
|
||||||
if (in_tensors_.size() == 3) {
|
|
||||||
auto offset_tensor = in_tensors_.at(2);
|
|
||||||
if (offset_tensor->data_c() != nullptr) {
|
|
||||||
memcpy(offset_, offset_tensor->data_c(), offset_tensor->ElementsNum() * sizeof(float));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ScaleCPUKernel::InitParameter() {
|
int ScaleCPUKernel::CalculateParameter() {
|
||||||
auto in_tensor = in_tensors_.at(0);
|
auto in_tensor = in_tensors_.at(0);
|
||||||
auto in_shape = in_tensor->shape();
|
auto in_shape = in_tensor->shape();
|
||||||
auto scale_tensor = in_tensors_.at(1);
|
auto scale_tensor = in_tensors_.at(1);
|
||||||
|
@ -118,32 +122,44 @@ int ScaleCPUKernel::Init() {
|
||||||
MS_LOG(ERROR) << "inputs to Scale operator should be 2 or 3, but " << in_tensors_.size() << " is given.";
|
MS_LOG(ERROR) << "inputs to Scale operator should be 2 or 3, but " << in_tensors_.size() << " is given.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
auto ret = InitScaleOffset();
|
||||||
|
if (ret != RET_OK) {
|
||||||
|
MS_LOG(ERROR) << "Scale fp32 InitScaleOffset failed.";
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
if (!InferShapeDone()) {
|
if (!InferShapeDone()) {
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
ReSize();
|
ReSize();
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ScaleCPUKernel::ReSize() {
|
int ScaleCPUKernel::ReSize() {
|
||||||
auto ret = InitParameter();
|
auto ret = CalculateParameter();
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "Scale fp32 InitParameter failed.";
|
MS_LOG(ERROR) << "Scale fp32 InitParameter failed.";
|
||||||
return RET_ERROR;
|
return RET_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = InitScaleOffset();
|
|
||||||
if (ret != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "Scale fp32 InitScaleOffset failed.";
|
|
||||||
return RET_ERROR;
|
|
||||||
}
|
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ScaleCPUKernel::Scale(int task_id) {
|
int ScaleCPUKernel::Scale(int task_id) {
|
||||||
DoScale(input_ptr_, output_ptr_, scale_, offset_, task_id, scale_param_);
|
switch (scale_param_->activation_type_) {
|
||||||
|
case schema::ActivationType_RELU6:
|
||||||
|
DoScaleRelu6(input_ptr_, output_ptr_, scale_, offset_, task_id, scale_param_);
|
||||||
|
break;
|
||||||
|
case schema::ActivationType_RELU:
|
||||||
|
DoScaleRelu(input_ptr_, output_ptr_, scale_, offset_, task_id, scale_param_);
|
||||||
|
break;
|
||||||
|
case schema::ActivationType_NO_ACTIVATION:
|
||||||
|
DoScale(input_ptr_, output_ptr_, scale_, offset_, task_id, scale_param_);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
MS_LOG(ERROR) << "Scale does not support activation type " << scale_param_->activation_type_;
|
||||||
|
return RET_ERROR;
|
||||||
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -164,14 +180,15 @@ int ScaleCPUKernel::Run() {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
auto in_tensor = in_tensors_.front();
|
auto in_tensor = in_tensors_.front();
|
||||||
input_ptr_ = reinterpret_cast<float *>(in_tensor->MutableData());
|
input_ptr_ = reinterpret_cast<float *>(in_tensor->data_c());
|
||||||
if (scale_ == nullptr) {
|
if (!scale_param_->const_scale_) {
|
||||||
auto scale_tensor = in_tensors_[1];
|
auto scale_tensor = in_tensors_[1];
|
||||||
scale_ = reinterpret_cast<float *>(scale_tensor->MutableData());
|
scale_ = reinterpret_cast<float *>(scale_tensor->data_c());
|
||||||
}
|
}
|
||||||
if (offset_ == nullptr) {
|
if (!scale_param_->const_offset_) {
|
||||||
|
MS_ASSERT(in_tensors_.size() == 3);
|
||||||
auto offset_tensor = in_tensors_.at(2);
|
auto offset_tensor = in_tensors_.at(2);
|
||||||
memcpy(offset_, offset_tensor->data_c(), offset_tensor->ElementsNum() * sizeof(float));
|
offset_ = reinterpret_cast<float *>(offset_tensor->data_c());
|
||||||
}
|
}
|
||||||
auto out_tensor = out_tensors_.front();
|
auto out_tensor = out_tensors_.front();
|
||||||
output_ptr_ = reinterpret_cast<float *>(out_tensor->MutableData());
|
output_ptr_ = reinterpret_cast<float *>(out_tensor->MutableData());
|
||||||
|
|
|
@ -36,7 +36,7 @@ class ScaleCPUKernel : public LiteKernel {
|
||||||
int Init() override;
|
int Init() override;
|
||||||
int ReSize() override;
|
int ReSize() override;
|
||||||
int Run() override;
|
int Run() override;
|
||||||
int InitParameter();
|
int CalculateParameter();
|
||||||
int InitScaleOffset();
|
int InitScaleOffset();
|
||||||
int Scale(int task_id);
|
int Scale(int task_id);
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,160 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include <vector>
|
||||||
|
#include "mindspore/lite/src/lite_kernel.h"
|
||||||
|
#include "mindspore/lite/src/tensor.h"
|
||||||
|
#include "common/common_test.h"
|
||||||
|
#include "nnacl/pad_parameter.h"
|
||||||
|
#include "mindspore/lite/src/kernel_registry.h"
|
||||||
|
#include "mindspore/lite/schema/ops_generated.h"
|
||||||
|
#include "nnacl/fp32/scale.h"
|
||||||
|
|
||||||
|
using mindspore::schema::ActivationType;
|
||||||
|
using mindspore::schema::ActivationType_NO_ACTIVATION;
|
||||||
|
using mindspore::schema::ActivationType_RELU;
|
||||||
|
using mindspore::schema::ActivationType_RELU6;
|
||||||
|
using mindspore::schema::Format_NHWC;
|
||||||
|
namespace mindspore {
|
||||||
|
|
||||||
|
class TestScaleFp32 : public mindspore::CommonTest {
|
||||||
|
public:
|
||||||
|
TestScaleFp32() = default;
|
||||||
|
void Prepare(const std::vector<int> &input_shape, const std::vector<int> &scale_shape,
|
||||||
|
const std::vector<int> &offset_shape, const std::vector<int> &output_shape, float *input_data,
|
||||||
|
float *scale_data, float *offset_data, float *output_data, int axis, ActivationType act_type,
|
||||||
|
const int thread_num);
|
||||||
|
|
||||||
|
void TearDown() override;
|
||||||
|
|
||||||
|
public:
|
||||||
|
float err_tol = 1e-5;
|
||||||
|
lite::Tensor in_tensor_;
|
||||||
|
lite::Tensor scale_tensor_;
|
||||||
|
lite::Tensor offset_tensor_;
|
||||||
|
lite::Tensor out_tensor_;
|
||||||
|
ScaleParameter param_;
|
||||||
|
std::vector<lite::Tensor *> inputs_{&in_tensor_, &scale_tensor_, &offset_tensor_};
|
||||||
|
std::vector<lite::Tensor *> outputs_{&out_tensor_};
|
||||||
|
kernel::KernelKey desc_ = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Scale};
|
||||||
|
lite::InnerContext ctx_ = lite::InnerContext();
|
||||||
|
kernel::KernelCreator creator_ = nullptr;
|
||||||
|
kernel::LiteKernel *kernel_ = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
void TestScaleFp32::TearDown() {
|
||||||
|
in_tensor_.SetData(nullptr);
|
||||||
|
scale_tensor_.SetData(nullptr);
|
||||||
|
offset_tensor_.SetData(nullptr);
|
||||||
|
out_tensor_.SetData(nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void TestScaleFp32::Prepare(const std::vector<int> &input_shape, const std::vector<int> &scale_shape,
|
||||||
|
const std::vector<int> &offset_shape, const std::vector<int> &output_shape,
|
||||||
|
float *input_data, float *scale_data, float *offset_data, float *output_data, int axis,
|
||||||
|
ActivationType act_type, const int thread_num) {
|
||||||
|
in_tensor_.set_data_type(kNumberTypeFloat32);
|
||||||
|
in_tensor_.SetFormat(Format_NHWC);
|
||||||
|
in_tensor_.set_shape(input_shape);
|
||||||
|
scale_tensor_.set_data_type(kNumberTypeFloat32);
|
||||||
|
scale_tensor_.SetFormat(Format_NHWC);
|
||||||
|
scale_tensor_.set_shape(scale_shape);
|
||||||
|
offset_tensor_.set_data_type(kNumberTypeFloat32);
|
||||||
|
offset_tensor_.SetFormat(Format_NHWC);
|
||||||
|
offset_tensor_.set_shape(offset_shape);
|
||||||
|
out_tensor_.set_data_type(kNumberTypeFloat32);
|
||||||
|
out_tensor_.set_shape(output_shape);
|
||||||
|
|
||||||
|
in_tensor_.SetData(input_data);
|
||||||
|
scale_tensor_.SetData(scale_data);
|
||||||
|
offset_tensor_.SetData(offset_data);
|
||||||
|
out_tensor_.SetData(output_data);
|
||||||
|
|
||||||
|
param_.activation_type_ = act_type;
|
||||||
|
param_.axis_ = axis;
|
||||||
|
ctx_ = lite::InnerContext();
|
||||||
|
ctx_.thread_num_ = thread_num;
|
||||||
|
ctx_.Init();
|
||||||
|
creator_ = lite::KernelRegistry::GetInstance()->GetCreator(desc_);
|
||||||
|
ASSERT_NE(creator_, nullptr);
|
||||||
|
kernel_ = creator_(inputs_, outputs_, reinterpret_cast<OpParameter *>(¶m_), &ctx_, desc_, nullptr);
|
||||||
|
ASSERT_NE(kernel_, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestScaleFp32, ScaleNoAct) {
|
||||||
|
std::vector<int> input_shape{1, 2, 2, 3};
|
||||||
|
std::vector<int> scale_shape{3};
|
||||||
|
std::vector<int> offset_shape{3};
|
||||||
|
std::vector<int> output_shape{1, 2, 2, 3};
|
||||||
|
float in_data[12] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0};
|
||||||
|
float scale_data[3] = {1.0, 2.0, 3.0};
|
||||||
|
float offset_data[3] = {1.0, 1.0, 1.0};
|
||||||
|
float out_data[12] = {0};
|
||||||
|
int axis = -1;
|
||||||
|
int thread_num = 2;
|
||||||
|
Prepare(input_shape, scale_shape, offset_shape, output_shape, in_data, scale_data, offset_data, out_data, axis,
|
||||||
|
ActivationType_NO_ACTIVATION, thread_num);
|
||||||
|
|
||||||
|
auto ret = kernel_->Run();
|
||||||
|
EXPECT_EQ(0, ret);
|
||||||
|
|
||||||
|
std::vector<float> expect{1.0, 3.0, 7.0, 4.0, 9.0, 16.0, 7.0, 15.0, 25.0, 10.0, 21.0, 34.0};
|
||||||
|
|
||||||
|
CompareOutputData(out_data, expect.data(), 12, err_tol);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(TestScaleFp32, ScaleRelu) {
|
||||||
|
std::vector<int> input_shape{1, 2, 2, 3};
|
||||||
|
std::vector<int> scale_shape{3};
|
||||||
|
std::vector<int> offset_shape{3};
|
||||||
|
std::vector<int> output_shape{1, 2, 2, 3};
|
||||||
|
float in_data[12] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0};
|
||||||
|
float scale_data[3] = {1.0, 2.0, 3.0};
|
||||||
|
float offset_data[3] = {-5.0, -5.0, -5.0};
|
||||||
|
float out_data[12] = {0};
|
||||||
|
int axis = -1;
|
||||||
|
int thread_num = 2;
|
||||||
|
Prepare(input_shape, scale_shape, offset_shape, output_shape, in_data, scale_data, offset_data, out_data, axis,
|
||||||
|
ActivationType_RELU, thread_num);
|
||||||
|
|
||||||
|
auto ret = kernel_->Run();
|
||||||
|
EXPECT_EQ(0, ret);
|
||||||
|
|
||||||
|
std::vector<float> expect{0.0, 0.0, 1.0, 0.0, 3.0, 10.0, 1.0, 9.0, 19.0, 4.0, 15.0, 28.0};
|
||||||
|
|
||||||
|
CompareOutputData(out_data, expect.data(), 12, err_tol);
|
||||||
|
}
|
||||||
|
TEST_F(TestScaleFp32, ScaleRelu6) {
|
||||||
|
std::vector<int> input_shape{1, 2, 2, 3};
|
||||||
|
std::vector<int> scale_shape{3};
|
||||||
|
std::vector<int> offset_shape{3};
|
||||||
|
std::vector<int> output_shape{1, 2, 2, 3};
|
||||||
|
float in_data[12] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0};
|
||||||
|
float scale_data[3] = {1.0, 2.0, 3.0};
|
||||||
|
float offset_data[3] = {-5.0, -5.0, -5.0};
|
||||||
|
float out_data[12] = {0};
|
||||||
|
int axis = -1;
|
||||||
|
int thread_num = 2;
|
||||||
|
Prepare(input_shape, scale_shape, offset_shape, output_shape, in_data, scale_data, offset_data, out_data, axis,
|
||||||
|
ActivationType_RELU6, thread_num);
|
||||||
|
|
||||||
|
auto ret = kernel_->Run();
|
||||||
|
EXPECT_EQ(0, ret);
|
||||||
|
|
||||||
|
std::vector<float> expect{0.0, 0.0, 1.0, 0.0, 3.0, 6.0, 1.0, 6.0, 6.0, 4.0, 6.0, 6.0};
|
||||||
|
|
||||||
|
CompareOutputData(out_data, expect.data(), 12, err_tol);
|
||||||
|
}
|
||||||
|
} // namespace mindspore
|
|
@ -144,23 +144,26 @@ STATUS MulAddFusionPass::AddNewScaleNode(MetaGraphT *graph, const std::unique_pt
|
||||||
// NHWC
|
// NHWC
|
||||||
int shape_size = graph->allTensors.at(addBiasIndex)->dims.size();
|
int shape_size = graph->allTensors.at(addBiasIndex)->dims.size();
|
||||||
scaleParam->axis = 0 - shape_size;
|
scaleParam->axis = 0 - shape_size;
|
||||||
mulNode->primitive->value.value = scaleParam.release();
|
|
||||||
mulNode->inputIndex.push_back(addBiasIndex);
|
mulNode->inputIndex.push_back(addBiasIndex);
|
||||||
if (addNode->primitive->value.AsAdd()->activationType != ActivationType_NO_ACTIVATION) {
|
auto activationType = addNode->primitive->value.AsAdd()->activationType;
|
||||||
|
if (activationType == ActivationType_RELU || activationType == ActivationType_RELU6 ||
|
||||||
|
activationType == ActivationType_NO_ACTIVATION) {
|
||||||
|
// delete addnode
|
||||||
|
scaleParam->activationType = activationType;
|
||||||
|
auto status = IsolateOneWayNode(graph, addNode);
|
||||||
|
if (status != RET_OK) {
|
||||||
|
MS_LOG(ERROR) << "IsolateOneWayNode failed";
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
// repace addnode as activation
|
// repace addnode as activation
|
||||||
std::unique_ptr<ActivationT> activationParam(new ActivationT());
|
std::unique_ptr<ActivationT> activationParam(new ActivationT());
|
||||||
activationParam->type = addNode->primitive->value.AsAdd()->activationType;
|
activationParam->type = addNode->primitive->value.AsAdd()->activationType;
|
||||||
addNode->primitive->value.type = schema::PrimitiveType_Activation;
|
addNode->primitive->value.type = schema::PrimitiveType_Activation;
|
||||||
addNode->primitive->value.value = activationParam.release();
|
addNode->primitive->value.value = activationParam.release();
|
||||||
addNode->inputIndex.pop_back();
|
addNode->inputIndex.pop_back();
|
||||||
return RET_OK;
|
|
||||||
}
|
|
||||||
// delete addnode
|
|
||||||
auto status = IsolateOneWayNode(graph, addNode);
|
|
||||||
if (status != RET_OK) {
|
|
||||||
MS_LOG(ERROR) << "IsolateOneWayNode failed";
|
|
||||||
return status;
|
|
||||||
}
|
}
|
||||||
|
mulNode->primitive->value.value = scaleParam.release();
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
} // namespace lite
|
} // namespace lite
|
||||||
|
|
Loading…
Reference in New Issue