forked from mindspore-Ecosystem/mindspore
!7765 transpose fp16 slice calc moved to ReSize and mark run into concat int32
Merge pull request !7765 from zhaozhenlong/lite/issue/I1X1X3_face_detection_fp16_error
This commit is contained in:
commit
831d3a69d4
|
@ -136,6 +136,7 @@ kernel::LiteKernel *CpuStridedSliceKernelCreator(const std::vector<lite::Tensor
|
||||||
return kernel;
|
return kernel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_StridedSlice, CpuStridedSliceKernelCreator)
|
||||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_StridedSlice, CpuStridedSliceKernelCreator)
|
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_StridedSlice, CpuStridedSliceKernelCreator)
|
||||||
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_StridedSlice, CpuStridedSliceKernelCreator)
|
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_StridedSlice, CpuStridedSliceKernelCreator)
|
||||||
} // namespace mindspore::kernel
|
} // namespace mindspore::kernel
|
||||||
|
|
|
@ -31,10 +31,6 @@ using mindspore::schema::PrimitiveType_Transpose;
|
||||||
|
|
||||||
namespace mindspore::kernel {
|
namespace mindspore::kernel {
|
||||||
int TransposeFp16CPUKernel::Init() {
|
int TransposeFp16CPUKernel::Init() {
|
||||||
TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_);
|
|
||||||
num_unit_ = static_cast<int>(in_tensors_[kInputIndex]->shape().at(param->perm_[kNHWC_H]));
|
|
||||||
thread_h_num_ = MSMIN(thread_num_, num_unit_);
|
|
||||||
thread_h_stride_ = UP_DIV(num_unit_, thread_h_num_);
|
|
||||||
if (!InferShapeDone()) {
|
if (!InferShapeDone()) {
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
|
@ -42,9 +38,12 @@ int TransposeFp16CPUKernel::Init() {
|
||||||
}
|
}
|
||||||
|
|
||||||
int TransposeFp16CPUKernel::ReSize() {
|
int TransposeFp16CPUKernel::ReSize() {
|
||||||
|
TransposeParameter *param = reinterpret_cast<TransposeParameter *>(this->op_parameter_);
|
||||||
|
num_unit_ = static_cast<int>(in_tensors_[kInputIndex]->shape().at(param->perm_[kNHWC_H]));
|
||||||
|
thread_h_num_ = MSMIN(thread_num_, num_unit_);
|
||||||
|
thread_h_stride_ = UP_DIV(num_unit_, thread_h_num_);
|
||||||
auto &in_tensor = in_tensors_.front();
|
auto &in_tensor = in_tensors_.front();
|
||||||
auto &out_tensor = out_tensors_.front();
|
auto &out_tensor = out_tensors_.front();
|
||||||
auto param = reinterpret_cast<TransposeParameter *>(op_parameter_);
|
|
||||||
auto in_shape = in_tensor->shape();
|
auto in_shape = in_tensor->shape();
|
||||||
auto out_shape = out_tensor->shape();
|
auto out_shape = out_tensor->shape();
|
||||||
param->strides_[param->num_axes_ - 1] = 1;
|
param->strides_[param->num_axes_ - 1] = 1;
|
||||||
|
|
|
@ -325,8 +325,10 @@ kernel::LiteKernel *CpuArithmeticFp32KernelCreator(const std::vector<lite::Tenso
|
||||||
|
|
||||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Mul, CpuArithmeticFp32KernelCreator)
|
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Mul, CpuArithmeticFp32KernelCreator)
|
||||||
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_Mul, CpuArithmeticFp32KernelCreator)
|
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_Mul, CpuArithmeticFp32KernelCreator)
|
||||||
|
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Mul, CpuArithmeticFp32KernelCreator)
|
||||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Add, CpuArithmeticFp32KernelCreator)
|
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Add, CpuArithmeticFp32KernelCreator)
|
||||||
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_Add, CpuArithmeticFp32KernelCreator)
|
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_Add, CpuArithmeticFp32KernelCreator)
|
||||||
|
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Add, CpuArithmeticFp32KernelCreator)
|
||||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Sub, CpuArithmeticFp32KernelCreator)
|
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Sub, CpuArithmeticFp32KernelCreator)
|
||||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Div, CpuArithmeticFp32KernelCreator)
|
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Div, CpuArithmeticFp32KernelCreator)
|
||||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_RealDiv, CpuArithmeticFp32KernelCreator)
|
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_RealDiv, CpuArithmeticFp32KernelCreator)
|
||||||
|
@ -338,6 +340,8 @@ REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_FloorDiv, CpuArithmeticFp32Ke
|
||||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_FloorMod, CpuArithmeticFp32KernelCreator)
|
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_FloorMod, CpuArithmeticFp32KernelCreator)
|
||||||
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_FloorDiv, CpuArithmeticFp32KernelCreator)
|
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_FloorDiv, CpuArithmeticFp32KernelCreator)
|
||||||
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_FloorMod, CpuArithmeticFp32KernelCreator)
|
REG_KERNEL(kCPU, kNumberTypeInt, PrimitiveType_FloorMod, CpuArithmeticFp32KernelCreator)
|
||||||
|
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_FloorDiv, CpuArithmeticFp32KernelCreator)
|
||||||
|
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_FloorMod, CpuArithmeticFp32KernelCreator)
|
||||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SquaredDifference, CpuArithmeticFp32KernelCreator)
|
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SquaredDifference, CpuArithmeticFp32KernelCreator)
|
||||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Eltwise, CpuArithmeticFp32KernelCreator)
|
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Eltwise, CpuArithmeticFp32KernelCreator)
|
||||||
|
|
||||||
|
|
|
@ -109,5 +109,6 @@ kernel::LiteKernel *CpuSliceFp32KernelCreator(const std::vector<lite::Tensor *>
|
||||||
return kernel;
|
return kernel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Slice, CpuSliceFp32KernelCreator)
|
||||||
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Slice, CpuSliceFp32KernelCreator)
|
REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Slice, CpuSliceFp32KernelCreator)
|
||||||
} // namespace mindspore::kernel
|
} // namespace mindspore::kernel
|
||||||
|
|
|
@ -296,7 +296,8 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<Tensor *> &in_tens
|
||||||
TypeId Scheduler::GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_tensors) {
|
TypeId Scheduler::GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_tensors) {
|
||||||
for (const auto &tensor : in_tensors) {
|
for (const auto &tensor : in_tensors) {
|
||||||
auto dtype = tensor->data_type();
|
auto dtype = tensor->data_type();
|
||||||
if (dtype == kNumberTypeFloat32 || dtype == kNumberTypeFloat16 || dtype == kNumberTypeInt8) {
|
if (dtype == kNumberTypeFloat32 || dtype == kNumberTypeFloat16 || dtype == kNumberTypeInt8 ||
|
||||||
|
dtype == kNumberTypeInt32) {
|
||||||
return dtype;
|
return dtype;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -341,7 +342,8 @@ kernel::SubGraphType Scheduler::GetKernelSubGraphType(kernel::LiteKernel *kernel
|
||||||
} else if (desc.arch == kernel::KERNEL_ARCH::kCPU) {
|
} else if (desc.arch == kernel::KERNEL_ARCH::kCPU) {
|
||||||
if (desc.data_type == kNumberTypeFloat16) {
|
if (desc.data_type == kNumberTypeFloat16) {
|
||||||
return kernel::kCpuFP16SubGraph;
|
return kernel::kCpuFP16SubGraph;
|
||||||
} else if (desc.data_type == kNumberTypeFloat32 || desc.data_type == kNumberTypeInt8) {
|
} else if (desc.data_type == kNumberTypeFloat32 || desc.data_type == kNumberTypeInt8 ||
|
||||||
|
desc.data_type == kNumberTypeInt32) {
|
||||||
return kernel::kCpuFP32SubGraph;
|
return kernel::kCpuFP32SubGraph;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue