forked from mindspore-Ecosystem/mindspore
!30256 [MS][LITE] fix npu op bugs
Merge pull request !30256 from XianglongZeng/myms_new
This commit is contained in:
commit
31d4c91ef1
|
@ -96,8 +96,8 @@ void AssistDataNHWC2NCHW(int *data, size_t unit_size);
|
|||
int MaskDataNHWC2NCHW(int mask);
|
||||
|
||||
template <typename T>
|
||||
ge::Operator *GetNPUConst(const uint8_t *const_data, const std::vector<int64_t> &shape, const ge::DataType data_type,
|
||||
std::string name = "const", bool is_expand_4d = false) {
|
||||
hiai::op::Const *GetNPUConst(const uint8_t *const_data, const std::vector<int64_t> &shape, const ge::DataType data_type,
|
||||
std::string name = "const", bool is_expand_4d = false) {
|
||||
MS_CHECK_TRUE_MSG(const_data != nullptr, nullptr, "Const data can not be nullptr.");
|
||||
int element_num = 1;
|
||||
if (!shape.empty()) {
|
||||
|
|
|
@ -186,6 +186,7 @@ Status NPUDelegate::Init() {
|
|||
{schema::PrimitiveType_Transpose, GetNPUOp<TransposeNPUOp>},
|
||||
{schema::PrimitiveType_Unsqueeze, GetNPUOp<UnsqueezeNPUOp>},
|
||||
{schema::PrimitiveType_Abs, GetNPUOp<AbsNPUOp>},
|
||||
{schema::PrimitiveType_Flatten, GetNPUOp<FlattenNPUOp>},
|
||||
};
|
||||
return mindspore::kSuccess;
|
||||
}
|
||||
|
|
|
@ -16,7 +16,11 @@
|
|||
|
||||
#include "src/delegate/npu/op/avg_pooling_npu.h"
|
||||
#include "src/delegate/npu/npu_converter_utils.h"
|
||||
#include "src/delegate/npu/npu_manager.h"
|
||||
|
||||
namespace mindspore {
|
||||
constexpr int MAX_HW_SIZE = 65534;
|
||||
|
||||
int AvgPoolingNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
auto pooling_prim = primitive->value_as_AvgPoolFusion();
|
||||
|
@ -32,6 +36,13 @@ int AvgPoolingNPUOp::IsSupport(const schema::Primitive *primitive, const std::ve
|
|||
MS_LOG(WARNING) << "Npu pooling does not support pad > stride.";
|
||||
return RET_NOT_SUPPORT;
|
||||
}
|
||||
auto input_shape = in_tensors.front().Shape();
|
||||
auto height = input_shape.at(NHWC_H);
|
||||
auto width = input_shape.at(NHWC_W);
|
||||
if (!NPUManager::CheckDDKVerGreatEqual("100.330.011.032") && height * width > MAX_HW_SIZE) {
|
||||
MS_LOG(WARNING) << "The pooling size of " << name_ << " exceeds the max size that NPU support.";
|
||||
return RET_NOT_SUPPORT;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -58,6 +58,11 @@ int ConvolutionBaseNPUOp::InitWeightConst(const std::vector<mindspore::MSTensor>
|
|||
|
||||
if (inputs[1].DataType() == DataType::kNumberTypeFloat16) {
|
||||
#ifdef ENABLE_ARM64
|
||||
nchw_weight_ = reinterpret_cast<float *>(malloc(inputs[1].ElementNum() * sizeof(float)));
|
||||
if (nchw_weight_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc buffer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
fp32_weight_ = reinterpret_cast<float *>(malloc(inputs[1].ElementNum() * sizeof(float)));
|
||||
if (fp32_weight_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc buffer failed.";
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "src/delegate/npu/op/flatten_npu.h"
|
||||
#include "include/graph/op/all_ops.h"
|
||||
#include "src/delegate/npu/npu_converter_utils.h"
|
||||
#include "src/delegate/npu/npu_manager.h"
|
||||
|
||||
namespace mindspore {
|
||||
int FlattenNPUOp::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
|
@ -25,15 +26,24 @@ int FlattenNPUOp::IsSupport(const schema::Primitive *primitive, const std::vecto
|
|||
MS_LOG(WARNING) << "The output tensor can only be flatten to 2 dimension.";
|
||||
return RET_NOT_SUPPORT;
|
||||
}
|
||||
use_reshape_ = !NPUManager::CheckDDKVerGreatEqual("100.330.011.032");
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int FlattenNPUOp::Init(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
flatten_ = new (std::nothrow) hiai::op::Flatten(name_);
|
||||
if (flatten_ == nullptr) {
|
||||
MS_LOG(ERROR) << name_ << " op is nullptr";
|
||||
return RET_ERROR;
|
||||
if (use_reshape_) {
|
||||
reshape_ = new (std::nothrow) hiai::op::Reshape(name_ + "_reshape");
|
||||
if (reshape_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New Reshape operator for op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
flatten_ = new (std::nothrow) hiai::op::Flatten(name_);
|
||||
if (flatten_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New Flatten operator for op " << name_ << " failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -41,16 +51,47 @@ int FlattenNPUOp::Init(const schema::Primitive *primitive, const std::vector<min
|
|||
int FlattenNPUOp::SetNPUInputs(const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors,
|
||||
const std::vector<ge::Operator *> &npu_inputs) {
|
||||
flatten_->set_input_x(*npu_inputs[0]);
|
||||
if (use_reshape_) {
|
||||
auto output_shape = out_tensors.front().Shape();
|
||||
int64_t dims = output_shape.size();
|
||||
std::vector<int> valid_shape;
|
||||
for (int i = 0; i < dims; i++) {
|
||||
valid_shape.emplace_back(static_cast<int>(output_shape.at(i)));
|
||||
}
|
||||
auto valid_data_ptr = reinterpret_cast<const uint8_t *>(valid_shape.data());
|
||||
shape_ = GetNPUConst<int>(valid_data_ptr, {dims}, ge::DT_INT32, name_ + "_shape");
|
||||
if (shape_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Get NPU Const for Reshape failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
reshape_->set_input_x(*npu_inputs[0]);
|
||||
reshape_->set_input_shape(*shape_);
|
||||
} else {
|
||||
flatten_->set_input_x(*npu_inputs[0]);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
ge::Operator *FlattenNPUOp::GetNPUOp() { return this->flatten_; }
|
||||
ge::Operator *FlattenNPUOp::GetNPUOp() {
|
||||
if (use_reshape_) {
|
||||
return this->reshape_;
|
||||
} else {
|
||||
return this->flatten_;
|
||||
}
|
||||
}
|
||||
|
||||
FlattenNPUOp::~FlattenNPUOp() {
|
||||
if (flatten_ != nullptr) {
|
||||
delete flatten_;
|
||||
flatten_ = nullptr;
|
||||
}
|
||||
if (reshape_ != nullptr) {
|
||||
delete reshape_;
|
||||
reshape_ = nullptr;
|
||||
}
|
||||
if (shape_ != nullptr) {
|
||||
delete shape_;
|
||||
shape_ = nullptr;
|
||||
}
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -43,6 +43,9 @@ class FlattenNPUOp : public NPUOp {
|
|||
|
||||
private:
|
||||
hiai::op::Flatten *flatten_ = nullptr;
|
||||
hiai::op::Reshape *reshape_ = nullptr;
|
||||
hiai::op::Const *shape_ = nullptr;
|
||||
bool use_reshape_ = false;
|
||||
};
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_NPU_OP_FLATTEN_NPU_H_
|
||||
|
|
|
@ -32,8 +32,11 @@ int SplitNPUOp::Init(const schema::Primitive *primitive, const std::vector<minds
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
axis_ = static_cast<int>(split_prim->axis());
|
||||
auto split_dim = in_tensors.at(0).Shape().at(axis_);
|
||||
auto in_tensor = in_tensors.at(0);
|
||||
auto axis = static_cast<int>(split_prim->axis());
|
||||
axis_ = axis >= 0 ? axis : axis + static_cast<int>(in_tensor.Shape().size());
|
||||
MS_CHECK_TRUE_MSG(axis_ >= 0, RET_ERROR, "The split axis is illegal!");
|
||||
auto split_dim = in_tensor.Shape().at(axis_);
|
||||
auto sizes_split = split_prim->size_splits();
|
||||
int size = split_prim->output_num();
|
||||
std::vector<int> sizes_split_vec;
|
||||
|
|
|
@ -27,7 +27,8 @@ namespace mindspore {
|
|||
std::set<mindspore::schema::PrimitiveType> insert_nodes = {
|
||||
schema::PrimitiveType_Concat, schema::PrimitiveType_AddFusion, schema::PrimitiveType_Eltwise,
|
||||
schema::PrimitiveType_Activation, schema::PrimitiveType_Split, schema::PrimitiveType_PadFusion,
|
||||
schema::PrimitiveType_StridedSlice, schema::PrimitiveType_MulFusion, schema::PrimitiveType_DivFusion};
|
||||
schema::PrimitiveType_StridedSlice, schema::PrimitiveType_MulFusion, schema::PrimitiveType_DivFusion,
|
||||
schema::PrimitiveType_Cast};
|
||||
|
||||
// this pass goal is to minimize subgraphs generated
|
||||
// by inserting nchw2nhwc or nhwc2nchw before or after the operator (e.g. concat, add, etc..) together with
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SCATTER_ND_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SCATTER_ND_H_
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SCATTER_ND_BASE_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SCATTER_ND_BASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/inner_kernel.h"
|
||||
|
@ -43,4 +43,4 @@ class ScatterNDCPUKernel : public InnerKernel {
|
|||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SCATTER_ND_H_
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SCATTER_ND_BASE_H_
|
||||
|
|
|
@ -45,7 +45,8 @@ squeezenet1.0-9.onnx;1:data_0
|
|||
residual_distill_cifar10_bs_1.onnx;1:actual_input
|
||||
residual_distill_cifar10_bs_32.onnx;1:actual_input
|
||||
residual_distill_bs_1.onnx;1:actual_input
|
||||
residual_distill_bs_32.onnx;1:actual_input
|
||||
#residual_distill_bs_32.onnx has random precision error in p50
|
||||
residual_distill_bs_32.onnx;1:actual_input 200
|
||||
crnn_lite_lstm_v2.onnx;1:input;32,32,32,1
|
||||
psenet_lite_mbv2.onnx;1:input;1,32,32,3
|
||||
residual_distill_res34_cifar10_bs_1_update.onnx;1:actual_input
|
||||
|
|
Loading…
Reference in New Issue