!7765 transpose fp16 slice calc moved to ReSize and mark run into concat int32

Merge pull request !7765 from zhaozhenlong/lite/issue/I1X1X3_face_detection_fp16_error
This commit is contained in:
mindspore-ci-bot 2020-10-27 23:19:19 +08:00 committed by Gitee
commit 831d3a69d4
5 changed files with 14 additions and 7 deletions

View File

@ -136,6 +136,7 @@ kernel::LiteKernel *CpuStridedSliceKernelCreator(const std::vector<lite::Tensor
return kernel; return kernel;
} }
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_StridedSlice, CpuStridedSliceKernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_StridedSlice, CpuStridedSliceKernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_StridedSlice, CpuStridedSliceKernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_StridedSlice, CpuStridedSliceKernelCreator) REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_StridedSlice, CpuStridedSliceKernelCreator)
} // namespace mindspore::kernel } // namespace mindspore::kernel

View File

@ -31,10 +31,6 @@ using mindspore::schema::PrimitiveType_Transpose;
namespace mindspore::kernel { namespace mindspore::kernel {
int TransposeFp16CPUKernel::Init() { int TransposeFp16CPUKernel::Init() {
TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_);
num_unit_ = static_cast<int>(in_tensors_[kInputIndex]->shape().at(param->perm_[kNHWC_H]));
thread_h_num_ = MSMIN(thread_num_, num_unit_);
thread_h_stride_ = UP_DIV(num_unit_, thread_h_num_);
if (!InferShapeDone()) { if (!InferShapeDone()) {
return RET_OK; return RET_OK;
} }
@ -42,9 +38,12 @@ int TransposeFp16CPUKernel::Init() {
} }
int TransposeFp16CPUKernel::ReSize() { int TransposeFp16CPUKernel::ReSize() {
TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_);
num_unit_ = static_cast<int>(in_tensors_[kInputIndex]->shape().at(param->perm_[kNHWC_H]));
thread_h_num_ = MSMIN(thread_num_, num_unit_);
thread_h_stride_ = UP_DIV(num_unit_, thread_h_num_);
auto &in_tensor = in_tensors_.front(); auto &in_tensor = in_tensors_.front();
auto &out_tensor = out_tensors_.front(); auto &out_tensor = out_tensors_.front();
auto param = reinterpret_cast<TransposeParameter *>(op_parameter_);
auto in_shape = in_tensor->shape(); auto in_shape = in_tensor->shape();
auto out_shape = out_tensor->shape(); auto out_shape = out_tensor->shape();
param->strides_[param->num_axes_ - 1] = 1; param->strides_[param->num_axes_ - 1] = 1;

View File

@ -325,8 +325,10 @@ kernel::LiteKernel *CpuArithmeticFp32KernelCreator(const std::vector<lite::Tenso
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Mul, CpuArithmeticFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Mul, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_Mul, CpuArithmeticFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_Mul, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Mul, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Add, CpuArithmeticFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Add, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_Add, CpuArithmeticFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_Add, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Add, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Sub, CpuArithmeticFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Sub, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Div, CpuArithmeticFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Div, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_RealDiv, CpuArithmeticFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_RealDiv, CpuArithmeticFp32KernelCreator)
@ -338,6 +340,8 @@ REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_FloorDiv, CpuArithmeticFp32Ke
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_FloorMod, CpuArithmeticFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_FloorMod, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_FloorDiv, CpuArithmeticFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_FloorDiv, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_FloorMod, CpuArithmeticFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_FloorMod, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_FloorDiv, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_FloorMod, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SquaredDifference, CpuArithmeticFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SquaredDifference, CpuArithmeticFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Eltwise, CpuArithmeticFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Eltwise, CpuArithmeticFp32KernelCreator)

View File

@ -109,5 +109,6 @@ kernel::LiteKernel *CpuSliceFp32KernelCreator(const std::vector<lite::Tensor *>
return kernel; return kernel;
} }
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Slice, CpuSliceFp32KernelCreator)
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Slice, CpuSliceFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Slice, CpuSliceFp32KernelCreator)
} // namespace mindspore::kernel } // namespace mindspore::kernel

View File

@ -296,7 +296,8 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<Tensor *> &in_tens
TypeId Scheduler::GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_tensors) { TypeId Scheduler::GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_tensors) {
for (const auto &tensor : in_tensors) { for (const auto &tensor : in_tensors) {
auto dtype = tensor->data_type(); auto dtype = tensor->data_type();
if (dtype == kNumberTypeFloat32 || dtype == kNumberTypeFloat16 || dtype == kNumberTypeInt8) { if (dtype == kNumberTypeFloat32 || dtype == kNumberTypeFloat16 || dtype == kNumberTypeInt8 ||
dtype == kNumberTypeInt32) {
return dtype; return dtype;
} }
} }
@ -341,7 +342,8 @@ kernel::SubGraphType Scheduler::GetKernelSubGraphType(kernel::LiteKernel *kernel
} else if (desc.arch == kernel::KERNEL_ARCH::kCPU) { } else if (desc.arch == kernel::KERNEL_ARCH::kCPU) {
if (desc.data_type == kNumberTypeFloat16) { if (desc.data_type == kNumberTypeFloat16) {
return kernel::kCpuFP16SubGraph; return kernel::kCpuFP16SubGraph;
} else if (desc.data_type == kNumberTypeFloat32 || desc.data_type == kNumberTypeInt8) { } else if (desc.data_type == kNumberTypeFloat32 || desc.data_type == kNumberTypeInt8 ||
desc.data_type == kNumberTypeInt32) {
return kernel::kCpuFP32SubGraph; return kernel::kCpuFP32SubGraph;
} }
} }