diff --git a/mindspore/lite/src/ops/addn.cc b/mindspore/lite/src/ops/addn.cc index c8a8e4e0a83..a6fab39cfc8 100644 --- a/mindspore/lite/src/ops/addn.cc +++ b/mindspore/lite/src/ops/addn.cc @@ -20,21 +20,28 @@ #include "src/ir/tensor.h" namespace mindspore::lite { -int AddN::InferShape(std::vector inputs_, std::vector outputs_) { +namespace { +constexpr int kLeastInputNum = 2; +} +int AddN::InferShape(std::vector inputs, std::vector outputs) { MS_ASSERT(this->primitive != nullptr); - auto input = inputs_.front(); + auto input = inputs.front(); MS_ASSERT(input != nullptr); - auto output = outputs_.front(); + auto output = outputs.front(); MS_ASSERT(output != nullptr); - if (inputs_.size() < kDoubleNum) { - MS_LOG(ERROR) << "input size is error"; + if (inputs.size() < kLeastInputNum) { + MS_LOG(ERROR) << "input size" << inputs.size() << " is error!"; return RET_INPUT_TENSOR_ERROR; } - for (int i = 1; i < inputs_.size(); ++i) { - if (inputs_.at(i)->shape() != inputs_.at(0)->shape()) { + for (int i = 1; i < inputs.size(); ++i) { + if (inputs.at(i)->shape() != inputs.at(0)->shape()) { MS_LOG(ERROR) << "AddN inputs shape is not equal!"; return RET_INPUT_TENSOR_ERROR; } + if (inputs.at(i)->data_type() != inputs.at(0)->data_type()) { + MS_LOG(ERROR) << "AddN all input data type should be the same!"; + return RET_INPUT_TENSOR_ERROR; + } } output->SetFormat(input->GetFormat()); output->set_shape(input->shape()); diff --git a/mindspore/lite/src/ops/argmax.cc b/mindspore/lite/src/ops/argmax.cc index d71910e438e..27992629c42 100644 --- a/mindspore/lite/src/ops/argmax.cc +++ b/mindspore/lite/src/ops/argmax.cc @@ -38,7 +38,11 @@ int ArgMax::InferShape(std::vector inputs_, std::vectoraxis() << ", input shape size: " << input_shape_size; return RET_PARAM_INVALID; } - output_shape.erase(output_shape.begin() + axis); + if (argmax_prim->topK() == -1) { + output_shape.erase(output_shape.begin() + axis); + } else if (argmax_prim->axisType() == 1) { + output_shape[axis] = argmax_prim->topK(); + } output->SetFormat(input->GetFormat()); output->set_shape(output_shape); diff --git a/mindspore/lite/src/ops/argmin.cc b/mindspore/lite/src/ops/argmin.cc index b501b14b1c9..c611d09601e 100644 --- a/mindspore/lite/src/ops/argmin.cc +++ b/mindspore/lite/src/ops/argmin.cc @@ -37,7 +37,11 @@ int ArgMin::InferShape(std::vector inputs_, std::vector output_shape(input->shape()); - output_shape.erase(output_shape.begin() + axis); + if (argmin_prim->topK() == -1) { + output_shape.erase(output_shape.begin() + axis); + } else if (argmin_prim->axisType() == 1) { + output_shape[axis] = argmin_prim->topK(); + } output->SetFormat(input->GetFormat()); output->set_shape(output_shape); diff --git a/mindspore/lite/src/populate_parameter.cc b/mindspore/lite/src/populate_parameter.cc index 7a72b38ee95..8cd47e2c3ad 100644 --- a/mindspore/lite/src/populate_parameter.cc +++ b/mindspore/lite/src/populate_parameter.cc @@ -485,7 +485,7 @@ PowerParameter *PopulatePowerParameter(const lite::Primitive *primitive) { return parameter; } -ArgMinMaxParameter *PopulateArgMinMaxParam(const lite::Primitive *primitive) { +ArgMinMaxParameter *PopulateArgMaxParam(const lite::Primitive *primitive) { ArgMinMaxParameter *parameter = new (std::nothrow) ArgMinMaxParameter(); if (parameter == nullptr) { MS_LOG(ERROR) << "new ArgMinMaxParameter failed."; @@ -501,6 +501,22 @@ ArgMinMaxParameter *PopulateArgMinMaxParam(const lite::Primitive *primitive) { return parameter; } +ArgMinMaxParameter *PopulateArgMinParam(const lite::Primitive *primitive) { + ArgMinMaxParameter *parameter = new (std::nothrow) ArgMinMaxParameter(); + if (parameter == nullptr) { + MS_LOG(ERROR) << "new ArgMinMaxParameter failed."; + return nullptr; + } + auto param = primitive->Value()->value_as_ArgMin(); + parameter->op_parameter_.type_ = primitive->Type(); + parameter->axis_ = param->axis(); + parameter->topk_ = param->topK(); + parameter->axis_type_ = param->axisType(); + parameter->out_value_ = param->outMaxValue(); + parameter->keep_dims_ = param->keepDims(); + return parameter; +} + CastParameter *PopulateCastParam(const lite::Primitive *primitive) { CastParameter *parameter = new (std::nothrow) CastParameter(); if (parameter == nullptr) { @@ -962,6 +978,16 @@ StridedSliceParameter *PopulateStridedSliceParam(const lite::Primitive *primitiv return parameter; } +OpParameter *PopulateAddNParam(const lite::Primitive *primitive) { + auto parameter = new (std::nothrow) OpParameter(); + if (parameter == nullptr) { + MS_LOG(ERROR) << "new OpParameter fail!"; + return nullptr; + } + parameter->type_ = primitive->Type(); + return parameter; +} + OpParameter *PopulateParameter(const lite::Primitive *primitive) { MS_EXCEPTION_IF_NULL(primitive); auto op_type = primitive->Type(); @@ -1020,8 +1046,9 @@ OpParameter *PopulateParameter(const lite::Primitive *primitive) { case schema::PrimitiveType_Floor: return reinterpret_cast(PopulateArithmeticSelf(primitive)); case schema::PrimitiveType_ArgMax: + return reinterpret_cast(PopulateArgMaxParam(primitive)); case schema::PrimitiveType_ArgMin: - return reinterpret_cast(PopulateArgMinMaxParam(primitive)); + return reinterpret_cast(PopulateArgMinParam(primitive)); case schema::PrimitiveType_Cast: return reinterpret_cast(PopulateCastParam(primitive)); case schema::PrimitiveType_Ceil: @@ -1078,6 +1105,8 @@ OpParameter *PopulateParameter(const lite::Primitive *primitive) { return reinterpret_cast(PopulateMatMulParameter(primitive)); case schema::PrimitiveType_OneHot: return reinterpret_cast(PopulateOneHotParameter(primitive)); + case schema::PrimitiveType_AddN: + return reinterpret_cast(PopulateAddNParam(primitive)); default: break; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc index a17828affd6..5784bbd0e40 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/addn.cc @@ -17,52 +17,100 @@ #include "src/kernel_registry.h" #include "src/runtime/kernel/arm/fp32/arithmetic.h" #include "include/errorcode.h" +#include "src/runtime/runtime_api.h" using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::KernelRegistrar; using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_NULL_PTR; using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_AddN; namespace mindspore::kernel { namespace { -constexpr int kLeastInputNum = 2; +int AddNLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { + if (cdata == nullptr) { + MS_LOG(ERROR) << "Input cdata is nullptr!"; + return RET_NULL_PTR; + } + auto kernel = reinterpret_cast(cdata); + return kernel->AddNParallelRun(thread_id); +} } -int AddNCPUKernel::Init() { return RET_OK; } +int AddNCPUKernel::Init() { + elements_num_ = inputs_[0]->ElementsNum(); + return RET_OK; +} int AddNCPUKernel::ReSize() { return RET_OK; } +int AddNCPUKernel::AddNParallelRun(int thread_id) { + int count_per_thread = UP_DIV(elements_num_, opParameter->thread_num_); + int count = MSMIN(count_per_thread, elements_num_ - thread_id * count_per_thread); + auto stride = count_per_thread * thread_id; + auto ret = ElementAdd(in1_addr_ + stride, in2_addr_ + stride, out_addr_ + stride, count); + if (ret != OPCLIB_OK) { + MS_LOG(ERROR) << "ElementAdd fail! ret: " << ret; + return RET_ERROR; + } + return RET_OK; +} + int AddNCPUKernel::Run() { auto input0_data = reinterpret_cast(inputs_[0]->Data()); auto input1_data = reinterpret_cast(inputs_[1]->Data()); auto output_data = reinterpret_cast(outputs_[0]->Data()); - auto element_num = inputs_[0]->ElementsNum(); - - ElementAdd(input0_data, input1_data, output_data, element_num); - for (int i = 2; i < inputs_.size(); ++i) { - ElementAdd(reinterpret_cast(inputs_[i]->Data()), output_data, output_data, element_num); + if (elements_num_ < opParameter->thread_num_) { + ElementAdd(input0_data, input1_data, output_data, elements_num_); + for (int i = 2; i < inputs_.size(); ++i) { + ElementAdd(reinterpret_cast(inputs_[i]->Data()), output_data, output_data, elements_num_); + } + return RET_OK; + } + in1_addr_ = input0_data; + in2_addr_ = input1_data; + out_addr_ = output_data; + int ret = LiteBackendParallelLaunch(AddNLaunch, this, opParameter->thread_num_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "addn launch fail!ret: " << ret; + return RET_ERROR; + } + for (size_t i = 2; i < inputs_.size(); ++i) { + in1_addr_ = reinterpret_cast(inputs_[i]->Data()); + in2_addr_ = output_data; + ret = LiteBackendParallelLaunch(AddNLaunch, this, opParameter->thread_num_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "addn launch fail!ret: " << ret << ", input index: " << i; + return RET_ERROR; + } } return RET_OK; } kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, - OpParameter *opParameter, const lite::Context *ctx, + OpParameter *op_parameter, const lite::Context *ctx, const kernel::KernelKey &desc) { - if (opParameter == nullptr) { - MS_LOG(ERROR) << "Input opParameter is nullptr!"; + if (op_parameter == nullptr) { + MS_LOG(ERROR) << "Input op_parameter is nullptr!"; return nullptr; } - auto *kernel = new (std::nothrow) AddNCPUKernel(opParameter, inputs, outputs); + if (ctx == nullptr) { + MS_LOG(ERROR) << "Input context is nullptr!"; + return nullptr; + } + + op_parameter->thread_num_ = ctx->threadNum; + auto *kernel = new (std::nothrow) AddNCPUKernel(op_parameter, inputs, outputs); if (kernel == nullptr) { MS_LOG(ERROR) << "new AddNCPUKernel fail!"; return nullptr; } auto ret = kernel->Init(); if (ret != RET_OK) { - MS_LOG(ERROR) << "Init kernel failed! name: " << opParameter->name_ << ", type: " - << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + MS_LOG(ERROR) << "Init kernel failed! name: " << op_parameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(op_parameter->type_)); delete kernel; return nullptr; } @@ -71,4 +119,3 @@ kernel::LiteKernel *CpuAddNFp32KernelCreator(const std::vector(this->opParameter); if (no_crop_) { - BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_); + BatchToSpaceNoCropForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, + sizeof(float)); } else { - BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_); + BatchToSpaceForNHWC(input_data, output_data, in_shape.data(), out_shape[0], param->block_shape_, param->crops_, + sizeof(float)); } return RET_OK; @@ -61,13 +63,13 @@ int BatchToSpaceCPUKernel::Run() { kernel::LiteKernel *CpuBatchToSpaceFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, - OpParameter *opParameter, const lite::Context *ctx, + OpParameter *op_parameter, const lite::Context *ctx, const kernel::KernelKey &desc) { - if (opParameter == nullptr) { - MS_LOG(ERROR) << "Input opParameter is nullptr!"; + if (op_parameter == nullptr) { + MS_LOG(ERROR) << "Input op_parameter is nullptr!"; return nullptr; } - auto *kernel = new (std::nothrow) BatchToSpaceCPUKernel(opParameter, inputs, outputs); + auto *kernel = new (std::nothrow) BatchToSpaceCPUKernel(op_parameter, inputs, outputs); if (kernel == nullptr) { MS_LOG(ERROR) << "new BatchToSpaceCPUKernel fail!"; return nullptr; @@ -76,8 +78,8 @@ kernel::LiteKernel *CpuBatchToSpaceFp32KernelCreator(const std::vectorInit(); if (ret != RET_OK) { delete kernel; - MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " - << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(op_parameter->type_)); return nullptr; } return kernel; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.cc index 072b91a5181..868a960c1e5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to.cc @@ -50,13 +50,13 @@ int BroadcastToCPUKernel::Run() { kernel::LiteKernel *CpuBroadcastToFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, - OpParameter *opParameter, const lite::Context *ctx, + OpParameter *op_parameter, const lite::Context *ctx, const kernel::KernelKey &desc) { - if (opParameter == nullptr) { - MS_LOG(ERROR) << "Input opParameter is nullptr!"; + if (op_parameter == nullptr) { + MS_LOG(ERROR) << "Input op_parameter is nullptr!"; return nullptr; } - auto *kernel = new (std::nothrow) BroadcastToCPUKernel(opParameter, inputs, outputs); + auto *kernel = new (std::nothrow) BroadcastToCPUKernel(op_parameter, inputs, outputs); if (kernel == nullptr) { MS_LOG(ERROR) << "new BroadcastToCPUKernel fail!"; return nullptr; @@ -65,8 +65,8 @@ kernel::LiteKernel *CpuBroadcastToFp32KernelCreator(const std::vectorInit(); if (ret != RET_OK) { delete kernel; - MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " - << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(op_parameter->type_)); return nullptr; } return kernel; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc index 7c22b139015..5e8df961fd3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast.cc @@ -95,6 +95,10 @@ kernel::LiteKernel *CpuCastFp32KernelCreator(const std::vectorthreadNum == 0) { + MS_LOG(ERROR) << "context thread num is 0!"; + return nullptr; + } auto *kernel = new (std::nothrow) CastCPUKernel(opParameter, inputs, outputs, ctx); if (kernel == nullptr) { MS_LOG(ERROR) << "new CastCPUKernel fail!"; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc index f974e6df5d0..32abb501b9a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc @@ -14,65 +14,85 @@ * limitations under the License. */ #include "src/runtime/kernel/arm/fp32/crop.h" -#include #include "schema/model_generated.h" #include "src/kernel_registry.h" #include "src/runtime/kernel/arm/opclib/fp32/crop.h" #include "include/errorcode.h" +#include "src/runtime/runtime_api.h" using mindspore::lite::KernelRegistrar; using mindspore::lite::RET_ERROR; using mindspore::lite::RET_FORMAT_ERR; +using mindspore::lite::RET_NULL_PTR; using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Crop; namespace mindspore::kernel { +namespace { +int CropLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { + if (cdata == nullptr) { + MS_LOG(ERROR) << "Input cdata is nullptr!"; + return RET_NULL_PTR; + } + auto kernel = reinterpret_cast(cdata); + return kernel->CropParallelRun(thread_id); +} +} + int CropCPUKernel::Init() { schema::Format input0_format = inputs_[0]->GetFormat(); - if (input0_format != schema::Format_NC4HW4) { - outputs_[0]->SetFormat(input0_format); - return RET_OK; + if (input0_format != schema::Format_NCHW && input0_format != schema::Format_NHWC) { + MS_LOG(ERROR) << "Unsupport format " << input0_format; + return RET_FORMAT_ERR; } - convert_function_ = LayoutTransform(inputs_[0]->data_type(), inputs_[0]->GetFormat(), schema::Format_NHWC); - if (convert_function_ == nullptr) { - MS_LOG(ERROR) << "Can not convert format " << inputs_[0]->GetFormat() << " to " << schema::Format_NHWC; - return RET_ERROR; - } - auto packed_input_size = inputs_[0]->Channel() * inputs_[0]->Batch() * inputs_[0]->Height() * inputs_[0]->Width(); - packed_input_ = reinterpret_cast(malloc(packed_input_size * sizeof(float))); - if (packed_input_ == nullptr) { - MS_LOG(ERROR) << "malloc memory fail!"; - return RET_ERROR; - } - memset(packed_input_, 0, packed_input_size * sizeof(float)); + outputs_[0]->SetFormat(input0_format); return RET_OK; } -int CropCPUKernel::Run() { +int CropCPUKernel::CropParallelRun(int thread_id) { auto input = inputs_[0]; auto output = outputs_[0]; float *input_data = reinterpret_cast(input->Data()); - if (convert_function_ != nullptr) { - convert_function_(input_data, packed_input_, inputs_[0]->Batch(), inputs_[0]->Height() * inputs_[0]->Width(), - inputs_[0]->Channel()); - } else { - packed_input_ = input_data; - } float *output_data = reinterpret_cast(output->Data()); Crop4D(input_data, output_data, input->shape().data(), output->shape().data(), reinterpret_cast(opParameter)); return RET_OK; } +int CropCPUKernel::Run() { + auto input = inputs_[0]; + auto output = outputs_[0]; + auto param = reinterpret_cast(opParameter); + if (output->shape()[1] < param->op_parameter_.thread_num_) { + float *input_data = reinterpret_cast(input->Data()); + float *output_data = reinterpret_cast(output->Data()); + Crop4DNoParallel(input_data, output_data, input->shape().data(), output->shape().data(), param); + return RET_OK; + } + + int ret = LiteBackendParallelLaunch(CropLaunch, this, param->op_parameter_.thread_num_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Crop launch fail!ret: " << ret; + return RET_ERROR; + } + return RET_OK; +} + kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, - OpParameter *opParameter, const lite::Context *ctx, + OpParameter *op_parameter, const lite::Context *ctx, const kernel::KernelKey &desc) { - if (opParameter == nullptr) { - MS_LOG(ERROR) << "Input opParameter is nullptr!"; + if (op_parameter == nullptr) { + MS_LOG(ERROR) << "Input op_parameter is nullptr!"; return nullptr; } - auto *kernel = new (std::nothrow) CropCPUKernel(opParameter, inputs, outputs); + if (ctx == nullptr) { + MS_LOG(ERROR) << "Input context is nullptr!"; + return nullptr; + } + + op_parameter->thread_num_ = ctx->threadNum; + auto *kernel = new (std::nothrow) CropCPUKernel(op_parameter, inputs, outputs); if (kernel == nullptr) { MS_LOG(ERROR) << "new CropCPUKernel fail!"; return nullptr; @@ -81,8 +101,8 @@ kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vectorInit(); if (ret != RET_OK) { delete kernel; - MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " - << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(op_parameter->type_)); return nullptr; } return kernel; @@ -90,4 +110,3 @@ kernel::LiteKernel *CpuCropFp32KernelCreator(const std::vector #include "src/lite_kernel.h" - #include "src/runtime/kernel/arm/base/layout_transform.h" namespace mindspore::kernel { class CropCPUKernel : public LiteKernel { public: CropCPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) - : LiteKernel(parameter, inputs, outputs), packed_input_(nullptr), convert_function_(nullptr) {} - ~CropCPUKernel() { - if (packed_input_ != nullptr) { - free(packed_input_); - packed_input_ = nullptr; - } - } - + const std::vector &outputs) : LiteKernel(parameter, inputs, outputs) {} + ~CropCPUKernel() = default; int Init() override; int ReSize() override { return 0; } int Run() override; - - private: - float *packed_input_; - LayoutConvertor convert_function_; + int CropParallelRun(int thread_id); }; } // namespace mindspore::kernel - #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CROP_H_ - diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc index 805bf57f1b2..b56ab59ceee 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/slice.cc @@ -19,13 +19,25 @@ #include "src/kernel_registry.h" #include "src/runtime/kernel/arm/opclib/fp32/slice.h" #include "include/errorcode.h" +#include "src/runtime/runtime_api.h" using mindspore::lite::KernelRegistrar; using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; +using mindspore::lite::RET_NULL_PTR; using mindspore::schema::PrimitiveType_Slice; namespace mindspore::kernel { +namespace { +int SliceLaunch(int thread_id, LiteParallelGroupEnv *penv, void *cdata) { + if (cdata == nullptr) { + MS_LOG(ERROR) << "Input cdata is nullptr!"; + return RET_NULL_PTR; + } + auto kernel = reinterpret_cast(cdata); + return kernel->SliceParallelRun(thread_id); +} +} int SliceCPUKernel::Init() { auto *param = reinterpret_cast(opParameter); @@ -35,34 +47,68 @@ int SliceCPUKernel::Init() { << input_shape.size(); return RET_ERROR; } - if (input_shape.size() > SLICE_SHAPE_MAX_SIZE) { - MS_LOG(ERROR) << "input dimension num should <= " << SLICE_SHAPE_MAX_SIZE; + if (input_shape.size() > DIMENSION_4D) { + MS_LOG(ERROR) << "input dimension num should <= " << DIMENSION_4D; return RET_ERROR; } for (size_t i = 0; i < input_shape.size(); ++i) { param->shape_[i] = input_shape[i]; } + outputs_[0]->SetFormat(inputs_[0]->GetFormat()); + return RET_OK; +} + +int SliceCPUKernel::SliceParallelRun(int thread_id) { + const float *input_data = reinterpret_cast(inputs_[0]->Data()); + float *output_data = reinterpret_cast(outputs_[0]->Data()); + SliceParameter *param = reinterpret_cast(opParameter); + DoSlice(input_data, output_data, param); return RET_OK; } int SliceCPUKernel::Run() { SliceParameter *param = reinterpret_cast(opParameter); + for (int i = 0; i < param->param_length_; ++i) { + if (param->size_[i] < 0) { + param->size_[i] = param->shape_[i] - param->begin_[i]; + } + param->end_[i] = param->begin_[i] + param->size_[i]; + } + + if (param->param_length_ < DIMENSION_4D) { + PadSliceParameterTo4D(param); + } + const float *input_data = reinterpret_cast(inputs_[0]->Data()); float *output_data = reinterpret_cast(outputs_[0]->Data()); - - return DoSlice(input_data, param, output_data); + if (param->size_[1] < param->op_parameter_.thread_num_) { + DoSliceNoParallel(input_data, output_data, param); + return RET_OK; + } + int ret = LiteBackendParallelLaunch(SliceLaunch, this, param->op_parameter_.thread_num_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "slice launch fail!ret: " << ret; + return RET_ERROR; + } + return RET_OK; } kernel::LiteKernel *CpuSliceFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, - OpParameter *opParameter, const lite::Context *ctx, + OpParameter *op_parameter, const lite::Context *ctx, const kernel::KernelKey &desc) { - if (opParameter == nullptr) { - MS_LOG(ERROR) << "Input opParameter is nullptr!"; + if (op_parameter == nullptr) { + MS_LOG(ERROR) << "Input op_parameter is nullptr!"; return nullptr; } - auto *kernel = new (std::nothrow) SliceCPUKernel(opParameter, inputs, outputs); + if (ctx == nullptr) { + MS_LOG(ERROR) << "Input context is nullptr!"; + return nullptr; + } + + op_parameter->thread_num_ = ctx->threadNum; + auto *kernel = new (std::nothrow) SliceCPUKernel(op_parameter, inputs, outputs); if (kernel == nullptr) { MS_LOG(ERROR) << "new SliceCPUKernel fail!"; return nullptr; @@ -71,8 +117,8 @@ kernel::LiteKernel *CpuSliceFp32KernelCreator(const std::vectorInit(); if (ret != RET_OK) { delete kernel; - MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " - << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(op_parameter->type_)); return nullptr; } return kernel; @@ -80,4 +126,3 @@ kernel::LiteKernel *CpuSliceFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, - OpParameter *opParameter, const lite::Context *ctx, + OpParameter *op_parameter, const lite::Context *ctx, const kernel::KernelKey &desc) { - if (opParameter == nullptr) { - MS_LOG(ERROR) << "Input opParameter is nullptr!"; + if (op_parameter == nullptr) { + MS_LOG(ERROR) << "Input op_parameter is nullptr!"; return nullptr; } - auto *kernel = new (std::nothrow) StackCPUKernel(opParameter, inputs, outputs); + auto *kernel = new (std::nothrow) StackCPUKernel(op_parameter, inputs, outputs); if (kernel == nullptr) { MS_LOG(ERROR) << "new StackCPUKernel fail!"; return nullptr; @@ -101,8 +101,8 @@ kernel::LiteKernel *CpuStackFp32KernelCreator(const std::vectorInit(); if (ret != RET_OK) { delete kernel; - MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " - << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(op_parameter->type_)); return nullptr; } return kernel; diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/batch_to_space.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/batch_to_space.cc index 4faccbae116..8def25c68ae 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/batch_to_space.cc +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/batch_to_space.cc @@ -17,7 +17,8 @@ #include "src/runtime/kernel/arm/opclib/fp32/batch_to_space.h" #include "src/runtime/kernel/arm/opclib/arithmetic_common.h" -void BatchToSpaceNoCropForNHWC(const float *input, float *output, const int *in_shape, int out_n, const int *block) { +void BatchToSpaceNoCropForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block, + int data_size) { int block_h = block[0]; int block_w = block[1]; int in_h = in_shape[1]; @@ -25,7 +26,7 @@ void BatchToSpaceNoCropForNHWC(const float *input, float *output, const int *in_ int in_c = in_shape[3]; size_t stride_h = block_w * out_n; size_t output_offset = 0; - size_t copy_size = in_c * 4; + size_t copy_size = in_c * data_size; size_t in_stride_h = in_w * in_c; size_t in_stride_n = in_stride_h * in_h; for (int n = 0; n < out_n; ++n) { @@ -36,8 +37,9 @@ void BatchToSpaceNoCropForNHWC(const float *input, float *output, const int *in_ size_t w_offset = w * in_c; for (int bw = 0; bw < block_w; ++bw) { size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset; - memcpy(output + output_offset, input + in_offset, copy_size); - output_offset += in_c; + memcpy(reinterpret_cast(output) + output_offset, + reinterpret_cast(input) + in_offset * data_size, copy_size); + output_offset += copy_size; } } } @@ -45,8 +47,8 @@ void BatchToSpaceNoCropForNHWC(const float *input, float *output, const int *in_ } } -void BatchToSpaceForNHWC(const float *input, float *output, const int *in_shape, int out_n, const int *block, - const int *crops) { +void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block, + const int *crops, int data_size) { int block_h = block[0]; int block_w = block[1]; int in_n = in_shape[0]; @@ -64,7 +66,7 @@ void BatchToSpaceForNHWC(const float *input, float *output, const int *in_shape, size_t stride_h = block_w * out_n; size_t output_offset = 0; - size_t copy_size = in_c * 4; + size_t copy_size = in_c * data_size; size_t in_stride_h = in_w * in_c; size_t in_stride_n = in_stride_h * in_h; for (int n = 0; n < out_n; ++n) { @@ -83,12 +85,12 @@ void BatchToSpaceForNHWC(const float *input, float *output, const int *in_shape, continue; } size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset; - memcpy(output + output_offset, input + in_offset, copy_size); - output_offset += in_c; + memcpy(reinterpret_cast(output) + output_offset, + reinterpret_cast(input) + in_offset * data_size, copy_size); + output_offset += copy_size; } } } } } } - diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/batch_to_space.h b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/batch_to_space.h index a008222474a..3ea61d488ce 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/batch_to_space.h +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/batch_to_space.h @@ -26,8 +26,8 @@ struct BatchToSpaceParameter { int32_t crops_[BATCH_TO_SPACE_CROPS_SIZE]; }; -void BatchToSpaceNoCropForNHWC(const float *input, float *output, const int *in_shape, int out_n, const int *block); -void BatchToSpaceForNHWC(const float *input, float *output, const int *in_shape, int out_n, const int *block, - const int *crops); +void BatchToSpaceNoCropForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block, + int data_size); +void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, int out_n, const int *block, + const int *crops, int data_size); #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_BATCH_TO_SPACE_H_ - diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.cc index 1f1e0135653..547535538e4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.cc +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.cc @@ -15,40 +15,74 @@ */ #include "src/runtime/kernel/arm/opclib/fp32/crop.h" #include +#include "src/runtime/kernel/arm/opclib/op_base.h" -void Pad4DOffset(CropParameter *crop_param) { - int64_t offset_tmp[DIMENSION_4D]; +void Pad4DOffset(CropParameter *crop_param, int64_t *offset) { int axis = crop_param->axis_; - for (int i = 3; i >= 0; --i) { + for (int i = DIMENSION_4D - 1; i >= 0; --i) { int offset_index = i - axis; if (offset_index >= 0) { - offset_tmp[i] = crop_param->offset_[offset_index]; + offset[i] = crop_param->offset_[offset_index]; } else { - offset_tmp[i] = 0; + offset[i] = 0; } } - for (int i = 0; i < DIMENSION_4D; ++i) { - crop_param->offset_[i] = offset_tmp[i]; - } } void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param) { - Pad4DOffset(crop_param); + int64_t offset_pad[DIMENSION_4D]; + Pad4DOffset(crop_param, offset_pad); + int out_shape1 = out_shape[1]; + int out_shape2 = out_shape[2]; + int out_shape3 = out_shape[3]; + size_t out_stride2 = out_shape3; + size_t out_stride1 = out_stride2 * out_shape2; + size_t out_stride0 = out_stride1 * out_shape1; + size_t in_stride2 = in_shape[3]; + size_t in_stride1 = in_stride2 * in_shape[2]; + size_t in_stride0 = in_stride1 * in_shape[1]; + size_t copy_size = out_shape3 * sizeof(float); + size_t count_per_thread = UP_DIV(out_shape1, crop_param->op_parameter_.thread_num_); + int thread_id = crop_param->thread_id_; + size_t thread_stride = thread_id * count_per_thread; + for (int i = 0; i < out_shape[0]; ++i) { + size_t out_offset0 = i * out_stride0; + size_t in_offset0 = (i + offset_pad[0]) * in_stride0 + offset_pad[3]; + for (size_t j = 0; j < count_per_thread; ++j) { + size_t k = j + thread_stride; + if (k >= out_shape1) { + break; + } + size_t out_offset1 = k * out_stride1 + out_offset0; + size_t in_offset1 = (k + offset_pad[1]) * in_stride1 + in_offset0; + for (int l = 0; l < out_shape2; ++l) { + size_t out_offset = l * out_stride2 + out_offset1; + size_t in_offset = (l + offset_pad[2]) * in_stride2 + in_offset1; + memcpy(output + out_offset, input + in_offset, copy_size); + } + } + } +} + +void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape, + CropParameter *crop_param) { + int64_t offset_pad[DIMENSION_4D]; + Pad4DOffset(crop_param, offset_pad); size_t in_dim2_stride = in_shape[3]; size_t in_dim1_stride = in_shape[2] * in_dim2_stride; size_t in_dim0_stride = in_dim1_stride * in_shape[1]; - size_t offset_3 = crop_param->offset_[3]; + size_t offset_3 = offset_pad[3]; size_t out_offset = 0; size_t copy_num = out_shape[3]; size_t copy_size = copy_num * sizeof(float); - size_t in_dim0_end = crop_param->offset_[0] + out_shape[0]; - size_t in_dim1_end = crop_param->offset_[1] + out_shape[1]; - size_t in_dim2_end = crop_param->offset_[2] + out_shape[2]; - for (int i = crop_param->offset_[0]; i < in_dim0_end; ++i) { + size_t in_dim0_end = offset_pad[0] + out_shape[0]; + size_t in_dim1_end = offset_pad[1] + out_shape[1]; + size_t in_dim2_end = offset_pad[2] + out_shape[2]; + for (int i = offset_pad[0]; i < in_dim0_end; ++i) { size_t dim0_offset = i * in_dim0_stride + offset_3; - for (int j = crop_param->offset_[1]; j < in_dim1_end; ++j) { + for (int j = offset_pad[1]; j < in_dim1_end; ++j) { size_t dim1_offset = j * in_dim1_stride + dim0_offset; - for (int k = crop_param->offset_[2]; k < in_dim2_end; ++k) { + for (int k = offset_pad[2]; k < in_dim2_end; ++k) { size_t in_offset = dim1_offset + k * in_dim2_stride; memcpy(output + out_offset, input + in_offset, copy_size); out_offset += copy_num; diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.h b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.h index 3d61355e6cd..45cf2d934ee 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.h +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/crop.h @@ -23,8 +23,11 @@ struct CropParameter { OpParameter op_parameter_; int64_t offset_[CROP_OFFSET_MAX_SIZE]; int64_t axis_; + int32_t thread_id_; }; void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param); +void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape, + CropParameter *crop_param); #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_CROP_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/slice.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/slice.cc index 4161b648c9c..37850298200 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/slice.cc +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/slice.cc @@ -15,7 +15,9 @@ */ #include "src/runtime/kernel/arm/opclib/fp32/slice.h" +#include #include "src/runtime/kernel/arm/opclib/op_base.h" +#include "src/runtime/kernel/arm/opclib/errorcode.h" void PadSliceParameterTo4D(SliceParameter *param) { int32_t begin[DIMENSION_4D]; @@ -25,7 +27,7 @@ void PadSliceParameterTo4D(SliceParameter *param) { for (int32_t i = 0; i < param->param_length_; ++i) { begin[i] = param->begin_[i]; end[i] = param->end_[i]; - slice_size[i] = param->size_[i]; + slice_size[i] = param->size_[i] < 0 ? param->shape_[i] - begin[i] : param->size_[i]; data_shape[i] = param->shape_[i]; } int32_t real_index = param->param_length_ - 1; @@ -45,36 +47,54 @@ void PadSliceParameterTo4D(SliceParameter *param) { param->param_length_ = DIMENSION_4D; } -int DoSlice(const float *input, SliceParameter *param, float *output) { - if (param->param_length_ > DIMENSION_4D) { - return -1; - } - - for (int i = 0; i < param->param_length_; ++i) { - if (param->size_[i] < 0) { - param->size_[i] = param->shape_[i] - param->begin_[i]; - } - param->end_[i] = param->begin_[i] + param->size_[i]; - } - - if (param->param_length_ < DIMENSION_4D) { - PadSliceParameterTo4D(param); - } - size_t dim_offset[DIMENSION_4D - 1]; - dim_offset[2] = param->shape_[3]; - dim_offset[1] = dim_offset[2] * param->shape_[2]; - dim_offset[0] = dim_offset[1] * param->shape_[1]; - size_t output_index = 0; - for (int32_t dim0 = param->begin_[0]; dim0 < param->end_[0]; ++dim0) { - for (int32_t dim1 = param->begin_[1]; dim1 < param->end_[1]; ++dim1) { - for (int32_t dim2 = param->begin_[2]; dim2 < param->end_[2]; ++dim2) { - for (int32_t dim3 = param->begin_[3]; dim3 < param->end_[3]; ++dim3) { - output[output_index++] = *(input + dim0 * dim_offset[0] - + dim1 * dim_offset[1] + dim2 * dim_offset[2] + dim3); - } +void DoSlice(const float *input, float *output, SliceParameter *param) { + int32_t out_dim1 = param->size_[1]; + int32_t out_dim2 = param->size_[2]; + int32_t out_dim3 = param->size_[3]; + size_t out_stride2 = out_dim3; + size_t out_stride1 = out_stride2 * out_dim2; + size_t out_stride0 = out_stride1 * out_dim1; + size_t count_per_thread = UP_DIV(out_dim1, param->op_parameter_.thread_num_); + int thread_id = param->thread_id_; + size_t thread_stride = thread_id * count_per_thread; + size_t copy_size = param->size_[3] * sizeof(float); + size_t in_stride2 = param->shape_[3]; + size_t in_stride1 = param->shape_[2] * in_stride2; + size_t in_stride0 = param->shape_[1] * in_stride1; + for (int i = 0; i < param->size_[0]; ++i) { + size_t out_offset0 = i * out_stride0; + size_t in_offset0 = (i + param->begin_[0]) * in_stride0 + param->begin_[3]; + for (size_t j = 0; j < count_per_thread; ++j) { + size_t k = j + thread_stride; + if (k >= out_dim1) { + break; + } + size_t out_offset1 = k * out_stride1 + out_offset0; + size_t in_offset1 = (k + param->begin_[1]) * in_stride1 + in_offset0; + for (int l = 0; l < out_dim2; ++l) { + size_t out_offset = out_offset1 + l * out_stride2; + size_t in_offset = in_offset1 + (l + param->begin_[2]) * in_stride2; + memcpy(output + out_offset, input + in_offset, copy_size); } } } - return 0; } +void DoSliceNoParallel(const float *input, float *output, SliceParameter *param) { + size_t copy_size = param->size_[3] * sizeof(float); + size_t in_stride2 = param->shape_[3]; + size_t in_stride1 = param->shape_[2] * in_stride2; + size_t in_stride0 = param->shape_[1] * in_stride1; + size_t out_offset = 0; + for (int32_t dim0 = param->begin_[0]; dim0 < param->end_[0]; ++dim0) { + size_t in_offset0 = dim0 * in_stride0 + param->begin_[3]; + for (size_t dim1 = param->begin_[1]; dim1 < param->end_[1]; ++dim1) { + size_t in_offset1 = dim1 * in_stride1 + in_offset0; + for (int32_t dim2 = param->begin_[2]; dim2 < param->end_[2]; ++dim2) { + size_t in_offset = in_offset1 + dim2 * in_stride2; + memcpy(output + out_offset, input + in_offset, copy_size); + out_offset += param->size_[3]; + } + } + } +} diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/slice.h b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/slice.h index 8873101fcb2..9545cfb8ba5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/slice.h +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/fp32/slice.h @@ -26,9 +26,11 @@ struct SliceParameter { int32_t size_[SLICE_SHAPE_MAX_SIZE]; int32_t shape_[SLICE_SHAPE_MAX_SIZE]; int32_t param_length_; + int32_t thread_id_; }; -int DoSlice(const float *input, SliceParameter *param, float *output); - +void PadSliceParameterTo4D(SliceParameter *param); +void DoSlice(const float *input, float *output, SliceParameter *param); +void DoSliceNoParallel(const float *input, float *output, SliceParameter *param); #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_FP32_SLICE_H_ diff --git a/mindspore/lite/tools/converter/parser/tflite/tflite_argmax_parser.cc b/mindspore/lite/tools/converter/parser/tflite/tflite_argmax_parser.cc index abef7bac380..aa93318eea1 100644 --- a/mindspore/lite/tools/converter/parser/tflite/tflite_argmax_parser.cc +++ b/mindspore/lite/tools/converter/parser/tflite/tflite_argmax_parser.cc @@ -29,6 +29,11 @@ STATUS TfliteArgmaxParser::Parse(const std::unique_ptr &tflit bool quantizedModel) { MS_LOG(DEBUG) << "parse TfliteArgmaxParser"; std::unique_ptr attr(new schema::ArgMaxT()); + // These are caffe attributes, set to default value. + attr->axisType = 1; + attr->outMaxValue = false; + attr->topK = -1; + attr->keepDims = false; if (op != nullptr) { op->primitive = std::make_unique();