diff --git a/mindspore/lite/src/populate_parameter.cc b/mindspore/lite/src/populate_parameter.cc index 87c099e6848..28ae45f4338 100644 --- a/mindspore/lite/src/populate_parameter.cc +++ b/mindspore/lite/src/populate_parameter.cc @@ -929,6 +929,7 @@ CropParameter *PopulateCropParameter(const lite::Primitive *primitive) { return nullptr; } parameter->axis_ = param->axis(); + parameter->offset_size_ = param_offset->size(); for (int i = 0; i < param_offset->size(); ++i) { parameter->offset_[i] = param_offset->Get(i); } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc new file mode 100644 index 00000000000..9f66feb2083 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc @@ -0,0 +1,108 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/runtime/kernel/arm/base/crop_base.h" +#include +#include "src/runtime/kernel/arm/int8/crop_int8.h" +#include "src/runtime/kernel/arm/fp32/crop.h" +#include "schema/model_generated.h" +#include "src/kernel_factory.h" +#include "include/errorcode.h" +#include "include/context.h" + +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Crop; + +namespace mindspore::kernel { +int CropBaseCPUKernel::Init() { return RET_OK; } + +kernel::LiteKernel *CpuCropInt8KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const Context *ctx, + const kernel::KernelKey &desc) { + if (opParameter == nullptr) { + MS_LOG(ERROR) << "Input opParameter is nullptr!"; + return nullptr; + } + MS_ASSERT(desc.type == schema::PrimitiveType_Crop); + auto *kernel = new (std::nothrow) CropInt8CPUKernel(opParameter, inputs, outputs, ctx); + if (kernel == nullptr) { + MS_LOG(ERROR) << "new CropCPUKernel fail!"; + return nullptr; + } + auto ret = kernel->Init(); + if (ret != RET_OK) { + delete kernel; + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + return nullptr; + } + return kernel; +} + +kernel::LiteKernel *CpuCropInt32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const Context *ctx, + const kernel::KernelKey &desc) { + if (opParameter == nullptr) { + MS_LOG(ERROR) << "Input opParameter is nullptr!"; + return nullptr; + } + MS_ASSERT(desc.type == schema::PrimitiveType_Crop); + auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx); + if (kernel == nullptr) { + MS_LOG(ERROR) << "new CropCPUKernel fail!"; + return nullptr; + } + auto ret = kernel->Init(); + if (ret != RET_OK) { + delete kernel; + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + return nullptr; + } + return kernel; +} + +kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const Context *ctx, + const kernel::KernelKey &desc) { + if (opParameter == nullptr) { + MS_LOG(ERROR) << "Input opParameter is nullptr!"; + return nullptr; + } + MS_ASSERT(desc.type == schema::PrimitiveType_Crop); + auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs, ctx); + if (kernel == nullptr) { + MS_LOG(ERROR) << "new CropCPUKernel fail!"; + return nullptr; + } + auto ret = kernel->Init(); + if (ret != RET_OK) { + delete kernel; + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + return nullptr; + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Crop, CpuCropInt8KernelCreator) +REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Crop, CpuCropInt32KernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Crop, CpuCropFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/base/crop_base.h b/mindspore/lite/src/runtime/kernel/arm/base/crop_base.h new file mode 100644 index 00000000000..883fe36405f --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/base/crop_base.h @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CROP_BASE_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CROP_BASE_H_ + +#include +#include "src/lite_kernel.h" +#include "src/runtime/kernel/arm/opclib/crop_parameter.h" + +using mindspore::lite::Context; + +namespace mindspore::kernel { +class CropBaseCPUKernel : public LiteKernel { + public: + CropBaseCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const Context *ctx) + : LiteKernel(parameter, inputs, outputs), ctx_(ctx), thread_count_(ctx->threadNum) { + opParameter->thread_num_ = ctx->threadNum; + } + ~CropBaseCPUKernel() = default; + + int Init() override; + int ReSize() override { return 0; } + int Run() override { return 0; } + + protected: + int thread_count_; + const Context *ctx_; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_CROP_BASE_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc index a2f8389b848..8fc811a6e32 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc @@ -17,6 +17,7 @@ #include "schema/model_generated.h" #include "src/kernel_registry.h" #include "src/runtime/kernel/arm/opclib/fp32/crop.h" +#include "src/runtime/kernel/arm/opclib/crop_parameter.h" #include "include/errorcode.h" #include "src/runtime/runtime_api.h" @@ -77,36 +78,4 @@ int CropCPUKernel::Run() { } return RET_OK; } - -kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vector &inputs, - const std::vector &outputs, - OpParameter *op_parameter, const lite::Context *ctx, - const kernel::KernelKey &desc) { - if (op_parameter == nullptr) { - MS_LOG(ERROR) << "Input op_parameter is nullptr!"; - return nullptr; - } - if (ctx == nullptr) { - MS_LOG(ERROR) << "Input context is nullptr!"; - return nullptr; - } - MS_ASSERT(desc.type == schema::PrimitiveType_Crop); - op_parameter->thread_num_ = ctx->threadNum; - auto *kernel = new (std::nothrow) CropCPUKernel(op_parameter, inputs, outputs); - if (kernel == nullptr) { - MS_LOG(ERROR) << "new CropCPUKernel fail!"; - return nullptr; - } - - auto ret = kernel->Init(); - if (ret != RET_OK) { - delete kernel; - MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " - << schema::EnumNamePrimitiveType(static_cast(op_parameter->type_)); - return nullptr; - } - return kernel; -} - -REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Crop, CpuCropFp32KernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/crop.h b/mindspore/lite/src/runtime/kernel/arm/fp32/crop.h index 05dbded21fa..f9656b23553 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/crop.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/crop.h @@ -18,12 +18,14 @@ #include #include "src/lite_kernel.h" #include "src/runtime/kernel/arm/base/layout_transform.h" +#include "src/runtime/kernel/arm/base/crop_base.h" namespace mindspore::kernel { -class CropCPUKernel : public LiteKernel { +class CropCPUKernel : public CropBaseCPUKernel { public: CropCPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) : LiteKernel(parameter, inputs, outputs) {} + const std::vector &outputs, const Context *ctx) + : CropBaseCPUKernel(parameter, inputs, outputs, ctx) {} ~CropCPUKernel() = default; int Init() override; int ReSize() override { return 0; } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc new file mode 100644 index 00000000000..8ec38d849cb --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc @@ -0,0 +1,96 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/int8/crop_int8.h" +#include +#include "src/runtime/kernel/arm/opclib/int8/crop_int8.h" +#include "include/errorcode.h" +#include "src/runtime/runtime_api.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; + +namespace mindspore::kernel { + +int CropInt8CPUKernel::Init() { + CropBaseCPUKernel::Init(); + auto *input_tensor = inputs_.at(kInputIndex); + auto in_quant_args = input_tensor->GetQuantParams(); + crop_para_->quant_arg.in_args_.scale_ = in_quant_args.front().scale; + crop_para_->quant_arg.in_args_.zp_ = in_quant_args.front().zeroPoint; + auto input_dim = input_tensor->shape().size(); + MS_ASSERT(input_dim <= CROP_OFFSET_MAX_SIZE); + crop_para_->input_dim_ = input_dim; + + auto *out_tensor = outputs_.at(kOutputIndex); + auto out_quant_args = out_tensor->GetQuantParams(); + crop_para_->quant_arg.out_args_.scale_ = out_quant_args.front().scale; + crop_para_->quant_arg.out_args_.zp_ = out_quant_args.front().zeroPoint; + + crop_para_->in_shape_ = input_tensor->shape().data(); + crop_para_->out_shape_ = out_tensor->shape().data(); + + crop_para_->quant_arg.output_activation_max_ = std::numeric_limits::max(); + crop_para_->quant_arg.output_activation_min_ = std::numeric_limits::min(); + + PadOffset(input_dim, crop_para_); + return RET_OK; +} + +int CropInt8CPUKernel::ReSize() { return 0; } + +int CropInt8CPUKernel::Run() { + auto ret = LiteBackendParallelLaunch(CropInt8Run, this, thread_count_); + return ret; +} + +void PadOffset(int input_dim, CropParameter *crop_para) { + auto axis = crop_para->axis_; + auto offsets_size = crop_para->offset_size_; + MS_ASSERT(axis <= input_dim); + if (offsets_size > 1) { + MS_ASSERT(axis + offsets_size == input_dim); + } + for (int i = 0; i < input_dim; i++) { + int crop_offset = 0; + if (i >= axis) { + if (offsets_size == 1) { + crop_offset = crop_para->offset_[0]; + } else if (offsets_size > 1) { + crop_offset = crop_para->offset_[i - axis]; + } + } + crop_para->in_offset_[i] = crop_offset; + } +} + +int CropInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { + auto crop = reinterpret_cast(cdata); + crop->DoExecute(task_id); + return RET_OK; +} + +int CropInt8CPUKernel::DoExecute(int task_id) { + auto input_tensor = inputs_.at(kInputIndex); + auto out_tensor = outputs_.at(kOutputIndex); + int8_t *input_data = reinterpret_cast(input_tensor->Data()); + int8_t *output_data = reinterpret_cast(out_tensor->Data()); + Crop(input_data, output_data, task_id, crop_para_); + return RET_OK; +} + +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h new file mode 100644 index 00000000000..ebcfd22a65e --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CROP_INT8_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CROP_INT8_H_ + +#include +#include "src/lite_kernel.h" +#include "include/context.h" +#include "src/runtime/kernel/arm/base/crop_base.h" +#include "src/runtime/runtime_api.h" + +using mindspore::lite::Context; + +namespace mindspore::kernel { +class CropInt8CPUKernel : public CropBaseCPUKernel { + public: + CropInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const Context *ctx) + : CropBaseCPUKernel(parameter, inputs, outputs, ctx) { + crop_para_ = reinterpret_cast(opParameter); + crop_para_->thread_count_ = opParameter->thread_num_; + } + ~CropInt8CPUKernel() = default; + + int Init() override; + int ReSize() override; + int Run() override; + int DoExecute(int tId); + + private: + CropParameter *crop_para_; +}; + +int CropInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata); +void PadOffset(int input_dim, CropParameter *crop_para); +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CROP_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/crop_parameter.h b/mindspore/lite/src/runtime/kernel/arm/opclib/crop_parameter.h new file mode 100644 index 00000000000..16dece28cb5 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/crop_parameter.h @@ -0,0 +1,37 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_CROP_PARAMETER_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_CROP_PARAMETER_H_ +#include "src/runtime/kernel/arm/opclib/op_base.h" + +#define CROP_OFFSET_MAX_SIZE 4 + +struct CropParameter { + OpParameter op_parameter_; + CropQuantArg quant_arg; + int thread_count_; + int thread_id_; + int offset_size_; + int64_t offset_[CROP_OFFSET_MAX_SIZE]; + int64_t in_offset_[CROP_OFFSET_MAX_SIZE]; + int64_t axis_; + const int *in_shape_; + const int *out_shape_; + int input_dim_; +}; + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_CROP_PARAMETER_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.cc index 547535538e4..340c8eed2cb 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.cc +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.cc @@ -16,6 +16,7 @@ #include "src/runtime/kernel/arm/opclib/fp32/crop.h" #include #include "src/runtime/kernel/arm/opclib/op_base.h" +#include "src/runtime/kernel/arm/opclib/crop_parameter.h" void Pad4DOffset(CropParameter *crop_param, int64_t *offset) { int axis = crop_param->axis_; diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.h b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.h index 45cf2d934ee..b50779b1513 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.h +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.h @@ -16,16 +16,10 @@ #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_CROP_H_ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_CROP_H_ #include "src/runtime/kernel/arm/opclib/op_base.h" +#include "src/runtime/kernel/arm/opclib/crop_parameter.h" #define CROP_OFFSET_MAX_SIZE 4 -struct CropParameter { - OpParameter op_parameter_; - int64_t offset_[CROP_OFFSET_MAX_SIZE]; - int64_t axis_; - int32_t thread_id_; -}; - void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param); void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param); diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/crop_int8.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/crop_int8.cc new file mode 100644 index 00000000000..2b46b464164 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/crop_int8.cc @@ -0,0 +1,222 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/opclib/crop_parameter.h" +#include "src/runtime/kernel/arm/opclib/int8/crop_int8.h" +#include + +void Crop(const int8_t *input, int8_t *output, int task_id, CropParameter *para) { + auto input_dim = para->input_dim_; + switch (input_dim) { + case 1: + Crop1D(input, output, task_id, para); + break; + case 2: + Crop2D(input, output, task_id, para); + break; + case 3: + Crop3D(input, output, task_id, para); + break; + case 4: + Crop4D(input, output, task_id, para); + break; + } +} + +void Crop1D(const int8_t *input, int8_t *output, int task_id, CropParameter *para) { + const int out_batch = para->out_shape_[0]; + const int thread_count = para->thread_count_; + int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_batch, thread_count) : out_batch; + + float in_scale = para->quant_arg.in_args_.scale_; + int32_t in_zp = para->quant_arg.in_args_.zp_; + float out_scale = para->quant_arg.out_args_.scale_; + int32_t out_zp = para->quant_arg.out_args_.zp_; + float scale = in_scale / out_scale; + float bias = -in_zp * scale; + + auto n = task_id * task_id_stride; + if (n >= out_batch) { + return; + } + const int8_t *in_ptr = input + n + para->in_offset_[0]; + int8_t *out_ptr = output + n; + int64_t out_dist_stride = MSMIN(out_batch - task_id * task_id_stride, task_id_stride); + if (in_scale == out_scale && in_zp == out_zp) { + memcpy(out_ptr, in_ptr, sizeof(int8_t) * out_dist_stride); + } else { + for (int i = 0; i < out_dist_stride; i++) { + int32_t output_tmp = round(in_ptr[i] * scale + bias) + out_zp; + if (output_tmp > para->quant_arg.output_activation_max_) { + out_ptr[i] = para->quant_arg.output_activation_max_; + } else if (output_tmp < para->quant_arg.output_activation_min_) { + out_ptr[i] = para->quant_arg.output_activation_min_; + } else { + out_ptr[i] = static_cast(output_tmp); + } + } + } + return; +} + +void Crop2D(const int8_t *input, int8_t *output, int task_id, CropParameter *para) { + const int in_height = para->in_shape_[1]; + const int out_batch = para->out_shape_[0]; + const int out_height = para->out_shape_[1]; + const int thread_count = para->thread_count_; + int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_height, thread_count) : out_height; + + float in_scale = para->quant_arg.in_args_.scale_; + int32_t in_zp = para->quant_arg.in_args_.zp_; + float out_scale = para->quant_arg.out_args_.scale_; + int32_t out_zp = para->quant_arg.out_args_.zp_; + float scale = in_scale / out_scale; + float bias = -in_zp * scale; + + for (int n = 0; n < out_batch; n++) { + auto h = task_id * task_id_stride; + if (h >= out_height) { + return; + } + const int8_t *in_ptr = input + (n + para->in_offset_[0]) * in_height + h + para->in_offset_[1]; + int8_t *out_ptr = output + n * out_height + h; + int64_t out_dist_stride = MSMIN(out_height - task_id * task_id_stride, task_id_stride); + if (in_scale == out_scale && in_zp == out_zp) { + memcpy(out_ptr, in_ptr, sizeof(int8_t) * out_dist_stride); + } else { + for (int i = 0; i < out_dist_stride; i++) { + int32_t output_tmp = round(in_ptr[i] * scale + bias) + out_zp; + if (output_tmp > para->quant_arg.output_activation_max_) { + out_ptr[i] = para->quant_arg.output_activation_max_; + } else if (output_tmp < para->quant_arg.output_activation_min_) { + out_ptr[i] = para->quant_arg.output_activation_min_; + } else { + out_ptr[i] = static_cast(output_tmp); + } + } + } + } + return; +} + +void Crop3D(const int8_t *input, int8_t *output, int task_id, CropParameter *para) { + const int in_height = para->in_shape_[1]; + const int in_width = para->in_shape_[2]; + + const int out_batch = para->out_shape_[0]; + const int out_height = para->out_shape_[1]; + const int out_width = para->out_shape_[2]; + + const int in_stride_h = in_width; + const int in_stride_n = in_stride_h * in_height; + + const int out_stride_h = out_width; + const int out_stride_n = out_stride_h * out_height; + + float in_scale = para->quant_arg.in_args_.scale_; + int32_t in_zp = para->quant_arg.in_args_.zp_; + float out_scale = para->quant_arg.out_args_.scale_; + int32_t out_zp = para->quant_arg.out_args_.zp_; + float scale = in_scale / out_scale; + float bias = -in_zp * scale; + + const int thread_count = para->thread_count_; + int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_height, thread_count) : out_height; + for (int n = 0; n < out_batch; n++) { + for (int t = 0; t < task_id_stride; t++) { + auto h = t + task_id * task_id_stride; + if (h >= out_height) { + break; + } + const int8_t *in_ptr = + input + (n + para->in_offset_[0]) * in_stride_n + (h + para->in_offset_[1]) * in_stride_h + para->in_offset_[2]; + int8_t *out_ptr = output + n * out_stride_n + h * out_stride_h; + if (in_scale == out_scale && in_zp == out_zp) { + memcpy(out_ptr, in_ptr, sizeof(int8_t) * out_width); + } else { + for (int i = 0; i < out_width; i++) { + int32_t output_tmp = round(in_ptr[i] * scale + bias) + out_zp; + if (output_tmp > para->quant_arg.output_activation_max_) { + out_ptr[i] = para->quant_arg.output_activation_max_; + } else if (output_tmp < para->quant_arg.output_activation_min_) { + out_ptr[i] = para->quant_arg.output_activation_min_; + } else { + out_ptr[i] = static_cast(output_tmp); + } + } + } + } + } + return; +} + +void Crop4D(const int8_t *input, int8_t *output, int task_id, CropParameter *para) { + const int in_height = para->in_shape_[1]; + const int in_width = para->in_shape_[2]; + const int in_channel = para->in_shape_[3]; + + const int out_batch = para->out_shape_[0]; + const int out_height = para->out_shape_[1]; + const int out_width = para->out_shape_[2]; + const int out_channel = para->out_shape_[3]; + + const int in_stride_w = in_channel; + const int in_stride_h = in_channel * in_width; + const int in_stride_n = in_stride_h * in_height; + + const int out_stride_w = out_channel; + const int out_stride_h = out_channel * out_width; + const int out_stride_n = out_stride_h * out_height; + + float in_scale = para->quant_arg.in_args_.scale_; + int32_t in_zp = para->quant_arg.in_args_.zp_; + float out_scale = para->quant_arg.out_args_.scale_; + int32_t out_zp = para->quant_arg.out_args_.zp_; + float scale = in_scale / out_scale; + float bias = -in_zp * scale; + + const int thread_count = para->thread_count_; + int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_height, thread_count) : out_height; + for (int n = 0; n < out_batch; n++) { + for (int t = 0; t < task_id_stride; t++) { + auto h = t + task_id * task_id_stride; + if (h >= out_height) { + break; + } + for (int w = 0; w < out_width; w++) { + const int8_t *in_ptr = input + (n + para->in_offset_[0]) * in_stride_n + + (h + para->in_offset_[1]) * in_stride_h + (w + para->in_offset_[2]) * in_stride_w + + para->in_offset_[3]; + int8_t *out_ptr = output + n * out_stride_n + h * out_stride_h + w * out_stride_w; + if (in_scale == out_scale && in_zp == out_zp) { + memcpy(out_ptr, in_ptr, sizeof(int8_t) * out_channel); + } else { + for (int i = 0; i < out_channel; i++) { + int32_t output_tmp = round(in_ptr[i] * scale + bias) + out_zp; + if (output_tmp > para->quant_arg.output_activation_max_) { + out_ptr[i] = para->quant_arg.output_activation_max_; + } else if (output_tmp < para->quant_arg.output_activation_min_) { + out_ptr[i] = para->quant_arg.output_activation_min_; + } else { + out_ptr[i] = static_cast(output_tmp); + } + } + } + } + } + } + return; +} diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/int8/crop_int8.h b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/crop_int8.h new file mode 100644 index 00000000000..f8f69555738 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/int8/crop_int8.h @@ -0,0 +1,28 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_CROP_INT8_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_CROP_INT8_H_ +#include "src/runtime/kernel/arm/opclib/op_base.h" +#include "src/runtime/kernel/arm/opclib/crop_parameter.h" + +void Crop(const int8_t *input, int8_t *output, int task_id, CropParameter *para); +void Crop1D(const int8_t *input, int8_t *output, int task_id, CropParameter *para); +void Crop2D(const int8_t *input, int8_t *output, int task_id, CropParameter *para); +void Crop3D(const int8_t *input, int8_t *output, int task_id, CropParameter *para); +void Crop4D(const int8_t *input, int8_t *output, int task_id, CropParameter *para); + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_INT8_CROP_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h b/mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h index 79de3dc5d0b..e93d9551490 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/quantization/quantize.h @@ -75,6 +75,13 @@ struct MulQuantArg { int shift_right_; }; +struct CropQuantArg { + QuantArg in_args_; + QuantArg out_args_; + int output_activation_min_; + int output_activation_max_; +}; + void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift); inline void QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier, diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/crop_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/crop_int8_tests.cc new file mode 100644 index 00000000000..553629d7be5 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/crop_int8_tests.cc @@ -0,0 +1,672 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "utils/log_adapter.h" +#include "common/common_test.h" +#include "mindspore/lite/src/runtime/kernel/arm/opclib/crop_parameter.h" +#include "mindspore/lite/src/kernel_registry.h" +#include "mindspore/lite/src/lite_kernel.h" +#include "mindspore/lite/src/ir/tensor.h" + +namespace mindspore { + +class TestCropInt8 : public mindspore::Common { + public: + TestCropInt8() {} +}; + +TEST_F(TestCropInt8, crop_1d_axis0_offset0_quant0_thread2) { + std::vector input1 = {1, 2, 3, 4, 5, 6, 7, 8}; + std::vector shape1 = {8}; + std::vector input(1, nullptr); + input[0] = input1.data(); + + const int output_size = 7; + int8_t output[7]; + std::vector output_shape = {7}; + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 1.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + std::vector inputs_tensor(1); + inputs_tensor[0] = input_tensor1; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + CropParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Crop; + lite::Context *ctx = new lite::Context; + ctx->threadNum = 2; + op_param.axis_ = 0; + op_param.offset_[0] = 1; + op_param.offset_size_ = 1; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Crop}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {2, 3, 4, 5, 6, 7, 8}; + PrintData("output data", output, output_size); + PrintData("output data shape", output_tensor_shape.data(), output_tensor_shape.size()); + CompareOutputData(output, except_result.data(), output_size, 0.000001); + + input_tensor1->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete output0_tensor; + delete ctx; +} + +TEST_F(TestCropInt8, crop_2d_axis1_offset0_quant0_thread2) { + std::vector input1 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + std::vector shape1 = {2, 8}; + std::vector input(1, nullptr); + input[0] = input1.data(); + + const int output_size = 14; + int8_t output[14]; + std::vector output_shape = {2, 7}; + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 1.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + std::vector inputs_tensor(1); + inputs_tensor[0] = input_tensor1; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + CropParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Crop; + lite::Context *ctx = new lite::Context; + ctx->threadNum = 2; + op_param.axis_ = 1; + op_param.offset_[0] = 1; + op_param.offset_size_ = 1; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Crop}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16}; + PrintData("output data", output, output_size); + PrintData("output data shape", output_tensor_shape.data(), output_tensor_shape.size()); + CompareOutputData(output, except_result.data(), output_size, 0.000001); + + input_tensor1->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete output0_tensor; + delete ctx; +} + +TEST_F(TestCropInt8, crop_3d_axis1_offset0_quant0_thread0) { + std::vector input1 = {1, 2, 3, 4, 5, 6, 7, 8}; + std::vector shape1 = {2, 2, 2}; + std::vector input(1, nullptr); + input[0] = input1.data(); + + const int output_size = 2; + int8_t output[2]; + std::vector output_shape = {2, 1, 1}; + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 1.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + std::vector inputs_tensor(1); + inputs_tensor[0] = input_tensor1; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + CropParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Crop; + lite::Context *ctx = new lite::Context; + ctx->threadNum = 1; + op_param.axis_ = 1; + op_param.offset_[0] = 1; + op_param.offset_size_ = 1; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Crop}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {4, 8}; + PrintData("output data", output, output_size); + PrintData("output data shape", output_tensor_shape.data(), output_tensor_shape.size()); + CompareOutputData(output, except_result.data(), output_size, 0.000001); + + input_tensor1->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete output0_tensor; + delete ctx; +} + +TEST_F(TestCropInt8, crop_3d_axis1_offset0_quant0_thread2) { + std::vector input1 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + std::vector shape1 = {2, 8, 2}; + std::vector input(1, nullptr); + input[0] = input1.data(); + + const int output_size = 14; + int8_t output[14]; + std::vector output_shape = {2, 7, 1}; + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 1.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + std::vector inputs_tensor(1); + inputs_tensor[0] = input_tensor1; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + CropParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Crop; + lite::Context *ctx = new lite::Context; + ctx->threadNum = 2; + op_param.axis_ = 1; + op_param.offset_[0] = 1; + op_param.offset_size_ = 1; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Crop}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {4, 6, 8, 10, 12, 14, 16, 20, 22, 24, 26, 28, 30, 32}; + PrintData("output data", output, output_size); + PrintData("output data shape", output_tensor_shape.data(), output_tensor_shape.size()); + CompareOutputData(output, except_result.data(), output_size, 0.000001); + + input_tensor1->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete output0_tensor; + delete ctx; +} + +TEST_F(TestCropInt8, crop_4d_axis0_offset0_quant0_thread0) { + std::vector input1 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + std::vector shape1 = {2, 2, 2, 2}; + std::vector input(1, nullptr); + input[0] = input1.data(); + + const int output_size = 1; + int8_t output[1]; + std::vector output_shape = {1, 1, 1, 1}; + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 1.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + std::vector inputs_tensor(1); + inputs_tensor[0] = input_tensor1; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + CropParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Crop; + lite::Context *ctx = new lite::Context; + ctx->threadNum = 1; + op_param.axis_ = 0; + op_param.offset_[0] = 1; + op_param.offset_size_ = 1; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Crop}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {16}; + PrintData("output data", output, output_size); + PrintData("output data shape", output_tensor_shape.data(), output_tensor_shape.size()); + CompareOutputData(output, except_result.data(), output_size, 0.000001); + + input_tensor1->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete output0_tensor; + delete ctx; +} + +TEST_F(TestCropInt8, crop_4d_axis1_offset0_quant0_thread0) { + std::vector input1 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + std::vector shape1 = {2, 2, 2, 2}; + std::vector input(1, nullptr); + input[0] = input1.data(); + + const int output_size = 2; + int8_t output[2]; + std::vector output_shape = {2, 1, 1, 1}; + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 1.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + std::vector inputs_tensor(1); + inputs_tensor[0] = input_tensor1; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + CropParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Crop; + lite::Context *ctx = new lite::Context; + ctx->threadNum = 1; + op_param.axis_ = 1; + op_param.offset_[0] = 1; + op_param.offset_size_ = 1; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Crop}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {8, 16}; + PrintData("output data", output, output_size); + PrintData("output data shape", output_tensor_shape.data(), output_tensor_shape.size()); + CompareOutputData(output, except_result.data(), output_size, 0.000001); + + input_tensor1->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete output0_tensor; + delete ctx; +} + +TEST_F(TestCropInt8, crop_4d_axis1_offset1_quant0_thread0) { + std::vector input1 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + std::vector shape1 = {2, 2, 2, 2}; + std::vector input(1, nullptr); + input[0] = input1.data(); + + const int output_size = 4; + int8_t output[4]; + std::vector output_shape = {1, 1, 2, 2}; + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 1.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + std::vector inputs_tensor(1); + inputs_tensor[0] = input_tensor1; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + CropParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Crop; + lite::Context *ctx = new lite::Context; + ctx->threadNum = 1; + op_param.axis_ = 0; + op_param.offset_[0] = 1; + op_param.offset_[1] = 1; + op_param.offset_[2] = 0; + op_param.offset_[3] = 0; + op_param.offset_size_ = 4; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Crop}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {13, 14, 15, 16}; + PrintData("output data", output, output_size); + PrintData("output data shape", output_tensor_shape.data(), output_tensor_shape.size()); + CompareOutputData(output, except_result.data(), output_size, 0.000001); + + input_tensor1->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete output0_tensor; + delete ctx; +} + +TEST_F(TestCropInt8, crop_4d_axis1_offset1_quant1_thread0) { + std::vector input1 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + std::vector shape1 = {2, 2, 2, 2}; + std::vector input(1, nullptr); + input[0] = input1.data(); + + const int output_size = 4; + int8_t output[4]; + std::vector output_shape = {1, 1, 2, 2}; + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 2.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + std::vector inputs_tensor(1); + inputs_tensor[0] = input_tensor1; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + CropParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Crop; + lite::Context *ctx = new lite::Context; + ctx->threadNum = 1; + op_param.axis_ = 0; + op_param.offset_[0] = 1; + op_param.offset_[1] = 1; + op_param.offset_[2] = 0; + op_param.offset_[3] = 0; + op_param.offset_size_ = 4; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Crop}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {7, 7, 8, 8}; + PrintData("output data", output, output_size); + PrintData("output data shape", output_tensor_shape.data(), output_tensor_shape.size()); + CompareOutputData(output, except_result.data(), output_size, 0.000001); + + input_tensor1->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete output0_tensor; + delete ctx; +} + +TEST_F(TestCropInt8, crop_4d_axis0_offset0_quant0_thread2) { + std::vector input1 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}; + std::vector shape1 = {2, 8, 2, 2}; + std::vector input(1, nullptr); + input[0] = input1.data(); + + const int output_size = 7; + int8_t output[7]; + std::vector output_shape = {1, 7, 1, 1}; + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 1.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + std::vector inputs_tensor(1); + inputs_tensor[0] = input_tensor1; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + CropParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Crop; + lite::Context *ctx = new lite::Context; + ctx->threadNum = 2; + op_param.axis_ = 0; + op_param.offset_[0] = 1; + op_param.offset_size_ = 1; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Crop}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {40, 44, 48, 52, 56, 60, 64}; + PrintData("output data", output, output_size); + PrintData("output data shape", output_tensor_shape.data(), output_tensor_shape.size()); + CompareOutputData(output, except_result.data(), output_size, 0.000001); + + input_tensor1->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete output0_tensor; + delete ctx; +} + +TEST_F(TestCropInt8, crop_4d_axis0_offset0_quant0_thread3) { + std::vector input1 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64}; + std::vector shape1 = {2, 8, 2, 2}; + std::vector input(1, nullptr); + input[0] = input1.data(); + + const int output_size = 7; + int8_t output[7]; + std::vector output_shape = {1, 7, 1, 1}; + lite::tensor::QuantArg input_quant_arg; + input_quant_arg.scale = 1.0; + input_quant_arg.zeroPoint = 0; + lite::tensor::QuantArg output_quant_arg; + output_quant_arg.scale = 1.0; + output_quant_arg.zeroPoint = 0; + + lite::tensor::Tensor *input_tensor1 = new lite::tensor::Tensor; + TypeId tid_int8 = kNumberTypeInt8; + input_tensor1->SetData(input1.data()); + input_tensor1->set_shape(shape1); + input_tensor1->AddQuantParam(input_quant_arg); + input_tensor1->set_data_type(tid_int8); + + std::vector inputs_tensor(1); + inputs_tensor[0] = input_tensor1; + + std::vector outputs_tensor(1); + lite::tensor::Tensor *output0_tensor = new lite::tensor::Tensor; + output0_tensor->SetData(output); + output0_tensor->set_shape(output_shape); + output0_tensor->AddQuantParam(output_quant_arg); + output0_tensor->set_data_type(tid_int8); + outputs_tensor[0] = output0_tensor; + + CropParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_Crop; + lite::Context *ctx = new lite::Context; + ctx->threadNum = 3; + op_param.axis_ = 0; + op_param.offset_[0] = 1; + op_param.offset_size_ = 1; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_Crop}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), ctx, desc); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor->shape(); + ASSERT_EQ(output_tensor_shape, output_shape); + kernel->Run(); + + std::vector except_result = {40, 44, 48, 52, 56, 60, 64}; + PrintData("output data", output, output_size); + PrintData("output data shape", output_tensor_shape.data(), output_tensor_shape.size()); + CompareOutputData(output, except_result.data(), output_size, 0.000001); + + input_tensor1->SetData(nullptr); + output0_tensor->SetData(nullptr); + delete input_tensor1; + delete output0_tensor; + delete ctx; +} +} // namespace mindspore