From 2153320b2fe908ca233594133edf97c235f267e7 Mon Sep 17 00:00:00 2001 From: zhengyuanhua Date: Mon, 4 Jul 2022 13:01:03 +0800 Subject: [PATCH] support ascend cloud infer --- .../ccsrc/transform/graph_ir/op_adapter.cc | 2 +- .../core/load_mindir/anf_model_parser.cc | 4 + mindspore/lite/CMakeLists.txt | 7 + mindspore/lite/cmake/lite_dependences.cmake | 9 +- mindspore/lite/src/CMakeLists.txt | 8 +- mindspore/lite/src/extendrt/CMakeLists.txt | 4 + .../src/extendrt/kernel/ascend/CMakeLists.txt | 26 + .../kernel/ascend/custom_ascend_kernel.cc | 242 +++++++ .../kernel/ascend/custom_ascend_kernel.h | 68 ++ .../kernel/ascend/model/acl_env_guard.cc | 60 ++ .../kernel/ascend/model/acl_env_guard.h | 42 ++ .../kernel/ascend/model/dyn_shape_process.cc | 179 +++++ .../kernel/ascend/model/dyn_shape_process.h | 48 ++ .../kernel/ascend/model/model_infer.cc | 170 +++++ .../kernel/ascend/model/model_infer.h | 65 ++ .../kernel/ascend/model/model_process.cc | 642 ++++++++++++++++++ .../kernel/ascend/model/model_process.h | 104 +++ .../kernel/ascend/options/acl_model_options.h | 39 ++ .../ascend/options/acl_options_parser.cc | 80 +++ .../ascend/options/acl_options_parser.h | 41 ++ .../lite/src/extendrt/single_op_session.cc | 39 +- .../lite/src/extendrt/single_op_session.h | 1 + .../src/extendrt/utils/kernel_build_utils.cc | 56 +- .../src/extendrt/utils/kernel_build_utils.h | 2 + .../adapter/acl/src/acl_model_process.cc | 4 + .../adapter/acl/src/acl_pass_impl.cc | 8 + .../format/delete_redundant_transpose.cc | 3 + 27 files changed, 1939 insertions(+), 14 deletions(-) create mode 100644 mindspore/lite/src/extendrt/kernel/ascend/CMakeLists.txt create mode 100644 mindspore/lite/src/extendrt/kernel/ascend/custom_ascend_kernel.cc create mode 100644 mindspore/lite/src/extendrt/kernel/ascend/custom_ascend_kernel.h create mode 100644 mindspore/lite/src/extendrt/kernel/ascend/model/acl_env_guard.cc create mode 100644 mindspore/lite/src/extendrt/kernel/ascend/model/acl_env_guard.h create mode 100644 mindspore/lite/src/extendrt/kernel/ascend/model/dyn_shape_process.cc create mode 100644 mindspore/lite/src/extendrt/kernel/ascend/model/dyn_shape_process.h create mode 100644 mindspore/lite/src/extendrt/kernel/ascend/model/model_infer.cc create mode 100644 mindspore/lite/src/extendrt/kernel/ascend/model/model_infer.h create mode 100644 mindspore/lite/src/extendrt/kernel/ascend/model/model_process.cc create mode 100644 mindspore/lite/src/extendrt/kernel/ascend/model/model_process.h create mode 100644 mindspore/lite/src/extendrt/kernel/ascend/options/acl_model_options.h create mode 100644 mindspore/lite/src/extendrt/kernel/ascend/options/acl_options_parser.cc create mode 100644 mindspore/lite/src/extendrt/kernel/ascend/options/acl_options_parser.h diff --git a/mindspore/ccsrc/transform/graph_ir/op_adapter.cc b/mindspore/ccsrc/transform/graph_ir/op_adapter.cc index 234f36130cc..31513b8d51d 100644 --- a/mindspore/ccsrc/transform/graph_ir/op_adapter.cc +++ b/mindspore/ccsrc/transform/graph_ir/op_adapter.cc @@ -84,7 +84,7 @@ std::string OpAdapterImpl::GetCustomOpType(const PrimitivePtr &prim) const { MS_EXCEPTION_IF_NULL(prim); auto value = prim->GetAttr("reg_op_name"); if (value == nullptr) { - MS_LOG(ERROR) << "Custom op has no func_type attr."; + MS_LOG(ERROR) << "Custom op has no reg_op_name attr."; return ""; } auto op_type = GetValue(value); diff --git a/mindspore/core/load_mindir/anf_model_parser.cc b/mindspore/core/load_mindir/anf_model_parser.cc index 39c4c920308..cb2a97c14ae 100644 --- a/mindspore/core/load_mindir/anf_model_parser.cc +++ b/mindspore/core/load_mindir/anf_model_parser.cc @@ -1441,6 +1441,10 @@ bool MSANFModelParser::BuildAttrForFuncGraph(const FuncGraphPtr &outputFuncGraph outputFuncGraph->set_attr(attr_proto.name(), ParseAttrInSingleScalar_int32_t_bool(attr_proto)); break; } + case mind_ir::AttributeProto_AttributeType_INT32: { + outputFuncGraph->set_attr(attr_proto.name(), ParseAttrInSingleScalar_int32_t_int32_t(attr_proto)); + break; + } default: MS_LOG(ERROR) << "Obtain attr for graph has not support input type: " << attr_type << "!"; return false; diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt index 3fbf3b89a14..04ae6a278bf 100644 --- a/mindspore/lite/CMakeLists.txt +++ b/mindspore/lite/CMakeLists.txt @@ -202,6 +202,13 @@ if(DEFINED ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE}) set(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE $ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE}) endif() +if(MSLITE_ENABLE_ACL AND MSLITE_ENABLE_CLOUD_FUSION_INFERENCE) + set(PLATFORM_ARM64 off) + set(PLATFORM_ARM32 off) + set(MSLITE_ENABLE_FP16 off) + set(ENABLE_NEON off) +endif() + if(MACHINE_LINUX_ARM64) add_compile_definitions(MACHINE_LINUX_ARM64) add_compile_definitions(LINUX_RUNTIME) diff --git a/mindspore/lite/cmake/lite_dependences.cmake b/mindspore/lite/cmake/lite_dependences.cmake index f9ac1933ccb..7ae49413eb2 100644 --- a/mindspore/lite/cmake/lite_dependences.cmake +++ b/mindspore/lite/cmake/lite_dependences.cmake @@ -31,7 +31,13 @@ if(MSLITE_DEPS_OPENCV) endif() if(MSLITE_DEPS_MKLDNN) - include(${TOP_DIR}/cmake/external_libs/mkl_dnn.cmake) + if(CMAKE_SYSTEM_NAME MATCHES "Linux") + set(USE_MS_THREADPOOL_FOR_DNNL ON) + endif() + if(USE_MS_THREADPOOL_FOR_DNNL) + add_compile_definitions(USE_MS_THREADPOOL_FOR_DNNL) + endif() +include(${TOP_DIR}/cmake/external_libs/mkl_dnn.cmake) endif() if(MSLITE_DEPS_LIBEVENT) @@ -47,6 +53,7 @@ if(MSLITE_DEPS_PYBIND11) include_directories(${Python3_NumPy_INCLUDE_DIRS}) include_directories(${TOP_DIR}) include_directories(${CORE_DIR}) + set(PYBIND11_CPP_STANDARD -std=c++17) include(${TOP_DIR}/cmake/external_libs/pybind11.cmake) endif() endif() diff --git a/mindspore/lite/src/CMakeLists.txt b/mindspore/lite/src/CMakeLists.txt index 679383b93bf..014725cac0c 100644 --- a/mindspore/lite/src/CMakeLists.txt +++ b/mindspore/lite/src/CMakeLists.txt @@ -421,9 +421,13 @@ add_subdirectory(runtime/kernel/cpu) add_library(lite_src_mid OBJECT ${LITE_SRC}) add_dependencies(lite_src_mid fbs_src) -if(MSLITE_ENABLE_ACL AND NOT MSLITE_ENABLE_CLOUD_FUSION_INFERENCE) +if(MSLITE_ENABLE_ACL) include_directories(${TOP_DIR}/graphengine/inc/external) - add_subdirectory(runtime/kernel/ascend) + if(NOT MSLITE_ENABLE_CLOUD_FUSION_INFERENCE) + add_subdirectory(runtime/kernel/ascend) + else() + add_compile_definitions(ENABLE_CLOUD_FUSION_INFERENCE) + endif() link_directories(${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) endif() diff --git a/mindspore/lite/src/extendrt/CMakeLists.txt b/mindspore/lite/src/extendrt/CMakeLists.txt index 6ada5c1fd2e..35fa2138cab 100644 --- a/mindspore/lite/src/extendrt/CMakeLists.txt +++ b/mindspore/lite/src/extendrt/CMakeLists.txt @@ -13,6 +13,8 @@ if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE) add_compile_definitions(USE_GLOG) string(REPLACE "-fno-rtti" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) string(REPLACE "-fno-rtti" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) + string(REPLACE "-fno-exceptions" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) + string(REPLACE "-fno-exceptions" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) add_compile_definitions(ENABLE_CLOUD_FUSION_INFERENCE) remove_definitions(-DBUILD_LITE_INFERENCE) set(MINDIR_MODEL_SRC @@ -165,7 +167,9 @@ if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE) if(MSLITE_ENABLE_ACL) include_directories(${TOP_DIR}/graphengine/inc/external) + add_subdirectory(kernel/ascend) link_directories(${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + target_link_libraries(mindspore-extendrt ascend_kernel_mid) endif() if(SUPPORT_CUDA) diff --git a/mindspore/lite/src/extendrt/kernel/ascend/CMakeLists.txt b/mindspore/lite/src/extendrt/kernel/ascend/CMakeLists.txt new file mode 100644 index 00000000000..d4fe7e74bec --- /dev/null +++ b/mindspore/lite/src/extendrt/kernel/ascend/CMakeLists.txt @@ -0,0 +1,26 @@ +include_directories(${TOP_DIR}/graphengine/inc/external) +include_directories(${TOP_DIR}/mindspore) +include_directories(${TOP_DIR}/mindspore/lite/src) + +find_library(ge_graph libgraph.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + +file(GLOB_RECURSE ASCEND_SRC ${CMAKE_CURRENT_SOURCE_DIR} + "custom_ascend_kernel.cc" + "model/*.cc" + ) + +add_library(ascend_kernel_mid OBJECT ${ASCEND_SRC}) + +add_dependencies(ascend_kernel_mid fbs_inner_src) +if("${MSLITE_REGISTRY_DEVICE}" STREQUAL "SD3403" AND PLATFORM_ARM64) + find_library(ge_graph libgraph.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(acl libascendcl.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(acl_retr libacl_retr.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(acl_cblas libacl_cblas.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + find_library(acl_runtime libruntime.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}) + target_link_libraries(ascend_kernel_mid ${ge_graph} ${acl} ${acl_retr} ${acl_cblas} ${acl_runtime}) +else() + target_link_libraries(ascend_kernel_mid ${ge_graph} ${ge_compiler} + ${acl_retr} ${acl_cblas} ${acl_dvpp} ${acl_runtime} ${libplatform} + ${libcompress} ${libopskernel} ${libaicore_utils} ${libaicpu_engine_common} ${acl}) +endif() \ No newline at end of file diff --git a/mindspore/lite/src/extendrt/kernel/ascend/custom_ascend_kernel.cc b/mindspore/lite/src/extendrt/kernel/ascend/custom_ascend_kernel.cc new file mode 100644 index 00000000000..4cc6fcd8a6c --- /dev/null +++ b/mindspore/lite/src/extendrt/kernel/ascend/custom_ascend_kernel.cc @@ -0,0 +1,242 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "extendrt/kernel/ascend/custom_ascend_kernel.h" +#include +#include "include/registry/register_kernel.h" +#include "include/api/types.h" +#include "include/api/data_type.h" +#include "extendrt/kernel/ascend/model/model_infer.h" +#include "extendrt/kernel/ascend/options/acl_options_parser.h" +#include "core/ops/custom.h" +#include "plugin/factory/ms_factory.h" +#include "src/common/log_util.h" +#include "common/log_adapter.h" + +namespace mindspore::kernel { +namespace acl { +CustomAscendKernelMod::CustomAscendKernelMod() + : load_model_(false), acl_options_(nullptr), dyn_shape_proc_(nullptr), model_infer_(nullptr), input_data_idx_(0) {} + +CustomAscendKernelMod::~CustomAscendKernelMod() { + if (load_model_) { + int ret = model_infer_->Finalize(); + if (ret != lite::RET_OK) { + MS_LOG(ERROR) << "Model finalize failed."; + } + } +} + +void CustomAscendKernelMod::RecordInputDataIndex(const std::vector &inputs) { + for (size_t idx = 0; idx < inputs.size(); ++idx) { + if (inputs[idx] == nullptr) { + MS_LOG(ERROR) << "Input " << idx << " is invalid."; + return; + } + if (inputs[idx]->GetData() == nullptr) { + input_data_idx_ = idx; + break; + } + } +} + +bool CustomAscendKernelMod::InitParam(const std::vector &inputs, + const std::vector &outputs) { + if (inputs.empty() || outputs.empty()) { + MS_LOG(ERROR) << "Custom kernel has empty inputs or outputs, which is invalid."; + return false; + } + inputs_.assign(inputs.begin(), inputs.end() - 1); + outputs_.assign(outputs.begin(), outputs.end()); + acl_options_ = std::make_shared(); + if (acl_options_ == nullptr) { + MS_LOG(ERROR) << "Create AclModelOptions failed."; + return false; + } + // AclOptionsParser parser; + // if (parser.ParseAclOptions(context_, &acl_options_) != lite::RET_OK) { + // MS_LOG(ERROR) << "Parse model options failed."; + // return false; + // } + // last input is om data tensor + int idx = inputs.size() - 1; + if (inputs[idx] == nullptr || inputs[idx]->GetData() == nullptr) { + MS_LOG(ERROR) << "Input " << idx << " is invalid."; + return false; + } + Buffer om_data(inputs[idx]->GetData()->addr, inputs[idx]->GetData()->size); + model_infer_ = std::make_shared(om_data, acl_options_); + if (model_infer_ == nullptr) { + MS_LOG(ERROR) << "Create ModelInfer failed."; + return false; + } + RecordInputDataIndex(inputs); + dyn_shape_proc_ = std::make_shared(acl_options_, input_data_idx_); + if (dyn_shape_proc_ == nullptr) { + MS_LOG(ERROR) << "Create DynShapeProcess failed."; + return false; + } + return true; +} + +bool CustomAscendKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector &inputs, + const std::vector &outputs) { + if (load_model_) { + MS_LOG(INFO) << "Om has been loaded in custom kernel."; + return lite::RET_OK; + } + + auto kernel_ptr = std::dynamic_pointer_cast(base_operator); + if (!kernel_ptr) { + MS_LOG(ERROR) << "Cast Custom ops failed!"; + return false; + } + if (!InitParam(inputs, outputs)) { + MS_LOG(ERROR) << "Init param failed."; + return false; + } + if (LoadModel() != lite::RET_OK) { + MS_LOG(ERROR) << "Load model failed."; + return false; + } + + load_model_ = true; + return true; +} + +int CustomAscendKernelMod::LoadModel() { + int ret = model_infer_->Init(); + if (ret != lite::RET_OK) { + MS_LOG(ERROR) << "Model infer init failed."; + return lite::RET_ERROR; + } + ret = model_infer_->Load(); + if (ret != lite::RET_OK) { + MS_LOG(ERROR) << "Load om data failed."; + return lite::RET_ERROR; + } + acl_options_->batch_size = model_infer_->GetDynamicBatch(); + acl_options_->image_size = model_infer_->GetDynamicImage(); + + MS_LOG(INFO) << "Load om data success."; + return lite::RET_OK; +} + +int CustomAscendKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector &inputs, + const std::vector &outputs, + const std::map &inputsOnHost) { + if (!load_model_) { + MS_LOG(WARNING) << "Model has not been loaded, start to load when resize."; + if (!Init(base_operator, inputs, outputs)) { + MS_LOG(ERROR) << "Load model failed when resize."; + return lite::RET_ERROR; + } + } + return lite::RET_OK; +} + +int CustomAscendKernelMod::SetInputAndOutputAddr(const std::vector &inputs, + const std::vector &outputs) { + if ((inputs_.size() + 1) != inputs.size()) { + MS_LOG(ERROR) << "Size of inputs in init [" << (inputs_.size() + 1) << "] and " + << "size of inputs in launch [" << inputs.size() << "] are not equal."; + return lite::RET_ERROR; + } + if (outputs_.size() != outputs.size()) { + MS_LOG(ERROR) << "Size of outputs in init (" << outputs_.size() << ") and " + << "size of outputs in launch (" << outputs.size() << ") are not equal."; + return lite::RET_ERROR; + } + for (size_t i = 0; i < inputs_.size(); ++i) { + if (inputs[i]->addr == nullptr || inputs[i]->size == 0) { + MS_LOG(ERROR) << "Input " << i << " addr is invalid."; + return lite::RET_ERROR; + } + inputs_[i]->SetData(inputs[i]); + } + for (size_t j = 0; j < outputs_.size(); ++j) { + if (outputs[j]->addr == nullptr || inputs[j]->size == 0) { + MS_LOG(ERROR) << "Output " << j << " addr is invalid."; + return lite::RET_ERROR; + } + outputs_[j]->SetData(outputs[j]); + } + return lite::RET_OK; +} + +bool CustomAscendKernelMod::Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) { + if (!load_model_) { + MS_LOG(ERROR) << "Init custom ascend kernel has been not ready."; + return false; + } + if (SetInputAndOutputAddr(inputs, outputs) != lite::RET_OK) { + MS_LOG(ERROR) << "Check input and output param failed."; + return false; + } + if (dyn_shape_proc_->ProcDynamicInput(&inputs_) != lite::RET_OK) { + MS_LOG(ERROR) << "Proc dynamic batch size input failed."; + return false; + } + if (model_infer_->Inference(inputs_, outputs_) != lite::RET_OK) { + MS_LOG(ERROR) << "Custom kernel execute failed."; + return false; + } + return true; +} + +// std::shared_ptr CustomCreateKernel(const std::vector &inputs, +// const std::vector &outputs, +// const schema::Primitive *primitive, const mindspore::Context *ctx) +// { +// if (primitive == nullptr) { +// MS_LOG(ERROR) << "Primitive is nullptr."; +// return nullptr; +// } +// if (primitive->value_type() != schema::PrimitiveType_Custom) { +// MS_LOG(ERROR) << "Primitive type is not PrimitiveType_Custom"; +// return nullptr; +// } +// +// auto kernel = std::make_shared(inputs, outputs, primitive, ctx); +// if (kernel == nullptr) { +// MS_LOG(ERROR) << "New custom kernel is nullptr"; +// return nullptr; +// } +// return kernel; +// } + +MS_KERNEL_FACTORY_REG(KernelMod, CustomAscend, CustomAscendKernelMod); +} // namespace acl +} // namespace mindspore::kernel +namespace mindspore { +namespace registry { +namespace { +const auto kFloat32 = DataType::kNumberTypeFloat32; +const auto kFloat16 = DataType::kNumberTypeFloat16; +const auto kInt32 = DataType::kNumberTypeInt32; +const auto kInt8 = DataType::kNumberTypeInt8; +const auto kUInt8 = DataType::kNumberTypeUInt8; +const auto kBool = DataType::kNumberTypeBool; +} // namespace +// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kFloat32, ACL, kernel::acl::CustomCreateKernel) +// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kFloat16, ACL, kernel::acl::CustomCreateKernel) +// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kInt32, ACL, kernel::acl::CustomCreateKernel) +// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kInt8, ACL, kernel::acl::CustomCreateKernel) +// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kUInt8, ACL, kernel::acl::CustomCreateKernel) +// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kBool, ACL, kernel::acl::CustomCreateKernel) +} // namespace registry +} // namespace mindspore diff --git a/mindspore/lite/src/extendrt/kernel/ascend/custom_ascend_kernel.h b/mindspore/lite/src/extendrt/kernel/ascend/custom_ascend_kernel.h new file mode 100644 index 00000000000..76ec7283eed --- /dev/null +++ b/mindspore/lite/src/extendrt/kernel/ascend/custom_ascend_kernel.h @@ -0,0 +1,68 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_CUSTOM_ASCEND_KERNEL_H_ +#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_CUSTOM_ASCEND_KERNEL_H_ + +#include +#include +#include +#include +#include "extendrt/kernel/ascend/options/acl_model_options.h" +#include "extendrt/kernel/ascend/model/model_infer.h" +#include "extendrt/kernel/ascend/model/dyn_shape_process.h" +#include "include/api/types.h" +#include "include/api/context.h" +#include "kernel/kernel.h" +#include "kernel/common_utils.h" +#include "include/errorcode.h" + +namespace mindspore::kernel { +namespace acl { +class CustomAscendKernelMod : public kernel::KernelMod { + public: + CustomAscendKernelMod(); + ~CustomAscendKernelMod() override; + + bool Init(const BaseOperatorPtr &base_operator, const std::vector &inputs, + const std::vector &outputs) override; + + int Resize( + const BaseOperatorPtr &base_operator, const std::vector &inputs, + const std::vector &outputs, + const std::map &inputsOnHost = std::map()) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override; + + private: + void RecordInputDataIndex(const std::vector &inputs); + bool InitParam(const std::vector &inputs, const std::vector &outputs); + int SetInputAndOutputAddr(const std::vector &inputs, const std::vector &outputs); + int LoadModel(); + + bool load_model_; + std::vector inputs_; + std::vector outputs_; + AclModelOptionsPtr acl_options_; + DynShapeProcPtr dyn_shape_proc_; + ModelInferPtr model_infer_; + size_t input_data_idx_; +}; +} // namespace acl +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_CUSTOM_ASCEND_KERNEL_H_ diff --git a/mindspore/lite/src/extendrt/kernel/ascend/model/acl_env_guard.cc b/mindspore/lite/src/extendrt/kernel/ascend/model/acl_env_guard.cc new file mode 100644 index 00000000000..ba5d5bd7825 --- /dev/null +++ b/mindspore/lite/src/extendrt/kernel/ascend/model/acl_env_guard.cc @@ -0,0 +1,60 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "extendrt/kernel/ascend/model/acl_env_guard.h" +#include "common/log_adapter.h" +#include "acl/acl.h" + +namespace mindspore::kernel { +namespace acl { +std::shared_ptr AclEnvGuard::global_acl_env_ = nullptr; +std::mutex AclEnvGuard::global_acl_env_mutex_; + +AclEnvGuard::AclEnvGuard(std::string_view cfg_file) { + errno_ = aclInit(cfg_file.data()); + if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) { + MS_LOG(ERROR) << "Execute aclInit Failed"; + return; + } + MS_LOG(INFO) << "Acl init success"; +} + +AclEnvGuard::~AclEnvGuard() { (void)aclFinalize(); } + +std::shared_ptr AclEnvGuard::GetAclEnv(std::string_view cfg_file) { + std::shared_ptr acl_env; + + std::lock_guard lock(global_acl_env_mutex_); + acl_env = global_acl_env_; + if (acl_env != nullptr) { + MS_LOG(INFO) << "Acl has been initialized, skip."; + if (!cfg_file.empty()) { + MS_LOG(WARNING) << "Dump config file option " << cfg_file << " is ignored."; + } + } else { + acl_env = std::make_shared(cfg_file); + aclError ret = acl_env->GetErrno(); + if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) { + MS_LOG(ERROR) << "Execute aclInit Failed"; + return nullptr; + } + global_acl_env_ = acl_env; + MS_LOG(INFO) << "Acl init success"; + } + return acl_env; +} +} // namespace acl +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/extendrt/kernel/ascend/model/acl_env_guard.h b/mindspore/lite/src/extendrt/kernel/ascend/model/acl_env_guard.h new file mode 100644 index 00000000000..4b6c1fcb15d --- /dev/null +++ b/mindspore/lite/src/extendrt/kernel/ascend/model/acl_env_guard.h @@ -0,0 +1,42 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_ACL_ENV_GUARD_H_ +#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_ACL_ENV_GUARD_H_ + +#include +#include +#include "acl/acl_base.h" + +namespace mindspore::kernel { +namespace acl { +class AclEnvGuard { + public: + explicit AclEnvGuard(std::string_view cfg_file); + ~AclEnvGuard(); + aclError GetErrno() const { return errno_; } + static std::shared_ptr GetAclEnv(std::string_view cfg_file); + + private: + static std::shared_ptr global_acl_env_; + static std::mutex global_acl_env_mutex_; + + aclError errno_; +}; +} // namespace acl +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_ACL_ENV_GUARD_H_ diff --git a/mindspore/lite/src/extendrt/kernel/ascend/model/dyn_shape_process.cc b/mindspore/lite/src/extendrt/kernel/ascend/model/dyn_shape_process.cc new file mode 100644 index 00000000000..3852d9587da --- /dev/null +++ b/mindspore/lite/src/extendrt/kernel/ascend/model/dyn_shape_process.cc @@ -0,0 +1,179 @@ +/** + * Copyright 2021-2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "extendrt/kernel/ascend/model/dyn_shape_process.h" +#include +#include "mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h" +#include "include/errorcode.h" + +namespace mindspore::kernel { +namespace acl { +namespace { +constexpr auto kInputDimNum = 4; +constexpr auto kNHWCHeightIdx = 1; +constexpr auto kNHWCWidthIdx = 2; +constexpr auto kNCHWHeightIdx = 2; +constexpr auto kNCHWWidthIdx = 3; +constexpr auto kImageSizeHwNum = 2; +} // namespace + +int DynShapeProcess::ProcDynamicInput(std::vector *const inputs) { + MS_CHECK_TRUE_MSG(acl_options_ != nullptr, lite::RET_ERROR, "Acl options ptr is nullptr."); + if (acl_options_->batch_size.empty() && acl_options_->image_size.empty()) { + MS_LOG(INFO) << "Inputs are not dynamic mode."; + return lite::RET_OK; + } + if (!acl_options_->batch_size.empty() && !acl_options_->image_size.empty()) { + MS_LOG(ERROR) << "Batch size and image size can't be set at the same time."; + return lite::RET_ERROR; + } + MS_CHECK_TRUE_MSG(inputs != nullptr, lite::RET_ERROR, "Inputs is nullptr."); + if (!acl_options_->batch_size.empty()) { + if (AddBatchSizeInput(inputs) != lite::RET_OK) { + MS_LOG(ERROR) << "Add batch size input failed."; + return lite::RET_ERROR; + } + } + if (!acl_options_->image_size.empty()) { + if (AddImageSizeInput(inputs) != lite::RET_OK) { + MS_LOG(ERROR) << "Add Image size input failed."; + return lite::RET_ERROR; + } + } + return lite::RET_OK; +} + +int DynShapeProcess::AddBatchSizeInput(std::vector *const inputs) { + int32_t *batch_size_addr = reinterpret_cast(malloc(sizeof(int32_t))); + if (batch_size_addr == nullptr) { + MS_LOG(ERROR) << "Malloc batch size failed."; + return lite::RET_ERROR; + } + if (GetRealBatchSize(inputs, batch_size_addr) != lite::RET_OK) { + MS_LOG(ERROR) << "Get real batch size failed."; + free(batch_size_addr); + return lite::RET_ERROR; + } + auto batch_size_ptr = std::make_shared
(batch_size_addr, sizeof(int32_t)); + if (batch_size_ptr == nullptr) { + MS_LOG(ERROR) << "Create Address failed."; + free(batch_size_addr); + return lite::RET_ERROR; + } + auto tensor_ptr = std::make_shared(); + if (tensor_ptr == nullptr) { + MS_LOG(ERROR) << "Create KernelTensor failed."; + free(batch_size_addr); + return lite::RET_ERROR; + } + + tensor_ptr->SetData(batch_size_ptr); + inputs->emplace_back(tensor_ptr); + return lite::RET_OK; +} + +int DynShapeProcess::AddImageSizeInput(std::vector *const inputs) { + int32_t *image_size_addr = reinterpret_cast(malloc(kImageSizeHwNum * sizeof(int32_t))); + if (image_size_addr == nullptr) { + MS_LOG(ERROR) << "Malloc image size failed."; + return lite::RET_ERROR; + } + if (GetRealImageSize(inputs, image_size_addr, kImageSizeHwNum) != lite::RET_OK) { + MS_LOG(ERROR) << "Get real image size failed."; + free(image_size_addr); + return lite::RET_ERROR; + } + auto image_size_ptr = std::make_shared
(image_size_addr, kImageSizeHwNum * sizeof(int32_t)); + if (image_size_ptr == nullptr) { + MS_LOG(ERROR) << "Create Address failed."; + free(image_size_addr); + return lite::RET_ERROR; + } + auto tensor_ptr = std::make_shared(); + if (tensor_ptr == nullptr) { + MS_LOG(ERROR) << "Create KernelTensor failed."; + free(image_size_addr); + return lite::RET_ERROR; + } + + tensor_ptr->SetData(image_size_ptr); + inputs->emplace_back(tensor_ptr); + return lite::RET_OK; +} + +int DynShapeProcess::GetRealBatchSize(std::vector *const inputs, int32_t *batch_size) { + MS_CHECK_TRUE_MSG(batch_size != nullptr, lite::RET_ERROR, "Batch size ptr is nullptr."); + if (input_data_idx_ >= inputs->size()) { + MS_LOG(ERROR) << " Input data index " << input_data_idx_ << " is larger than input size " << inputs->size(); + return lite::RET_ERROR; + } + auto tensor = (*inputs)[input_data_idx_]; + std::vector shape = tensor->GetShapeVector(); + if (shape.empty()) { + MS_LOG(ERROR) << "Shape is empty, input index = " << input_data_idx_; + return lite::RET_ERROR; + } + int32_t cur_batch_size = static_cast(shape[0]); + auto iter = acl_options_->batch_size.find(cur_batch_size); + if (iter == acl_options_->batch_size.end()) { + MS_LOG(ERROR) << "Current batch size " << cur_batch_size << " is invalid, please check device info of context"; + return lite::RET_ERROR; + } + *batch_size = cur_batch_size; + MS_LOG(DEBUG) << "Current batch size " << cur_batch_size; + return lite::RET_OK; +} + +int DynShapeProcess::GetRealImageSize(std::vector *const inputs, int32_t *image_size, int32_t num) { + MS_CHECK_TRUE_MSG(image_size != nullptr, lite::RET_ERROR, "Image size ptr is nullptr."); + if (input_data_idx_ >= inputs->size()) { + MS_LOG(ERROR) << "Input data index " << input_data_idx_ << " is larger than input size " << inputs->size(); + return lite::RET_ERROR; + } + auto tensor = (*inputs)[input_data_idx_]; + std::vector shape = tensor->GetShapeVector(); + if (shape.size() != kInputDimNum) { + MS_LOG(ERROR) << "Shape size " << shape.size() << " is invalid, input index = " << input_data_idx_; + return lite::RET_ERROR; + } + auto format = tensor->GetFormat(); + uint64_t height; + uint64_t width; + if (format == mindspore::Format::NHWC) { + height = shape[kNHWCHeightIdx]; + width = shape[kNHWCWidthIdx]; + } else { + height = shape[kNCHWHeightIdx]; + width = shape[kNCHWWidthIdx]; + } + auto cur_image_size = std::pair(static_cast(height), static_cast(width)); + auto iter = acl_options_->image_size.find(cur_image_size); + if (iter == acl_options_->image_size.end()) { + MS_LOG(ERROR) << "Image size height " << height << ",weight " << width + << " is invalid, please check device info of context."; + return lite::RET_ERROR; + } + if (num != kImageSizeHwNum) { + MS_LOG(ERROR) << "The hw num should be " << kImageSizeHwNum << ",real num " << num; + return lite::RET_ERROR; + } + image_size[0] = height; + image_size[1] = width; + MS_LOG(DEBUG) << "Current height " << height << " width " << width; + return lite::RET_OK; +} +} // namespace acl +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/extendrt/kernel/ascend/model/dyn_shape_process.h b/mindspore/lite/src/extendrt/kernel/ascend/model/dyn_shape_process.h new file mode 100644 index 00000000000..1a1ebaebb0e --- /dev/null +++ b/mindspore/lite/src/extendrt/kernel/ascend/model/dyn_shape_process.h @@ -0,0 +1,48 @@ +/** + * Copyright 2021-2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_DYN_SHAPE_PROCESS_H +#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_DYN_SHAPE_PROCESS_H + +#include +#include +#include "extendrt/kernel/ascend/options/acl_model_options.h" +#include "kernel/kernel.h" +#include "include/api/types.h" + +namespace mindspore::kernel { +namespace acl { +class DynShapeProcess { + public: + explicit DynShapeProcess(const AclModelOptionsPtr &options, size_t input_data_idx) + : acl_options_(options), input_data_idx_(input_data_idx) {} + + int ProcDynamicInput(std::vector *const inputs); + + private: + int AddBatchSizeInput(std::vector *const inputs); + int AddImageSizeInput(std::vector *const inputs); + int GetRealBatchSize(std::vector *const inputs, int32_t *batch_size); + int GetRealImageSize(std::vector *const inputs, int32_t *image_size, int32_t num); + + AclModelOptionsPtr acl_options_; + size_t input_data_idx_; +}; + +using DynShapeProcPtr = std::shared_ptr; +} // namespace acl +} // namespace mindspore::kernel +#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_DYN_SHAPE_PROCESS_H diff --git a/mindspore/lite/src/extendrt/kernel/ascend/model/model_infer.cc b/mindspore/lite/src/extendrt/kernel/ascend/model/model_infer.cc new file mode 100644 index 00000000000..7dc1bb000fc --- /dev/null +++ b/mindspore/lite/src/extendrt/kernel/ascend/model/model_infer.cc @@ -0,0 +1,170 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "extendrt/kernel/ascend/model/model_infer.h" +#include "common/log_adapter.h" +#include "acl/acl.h" + +namespace mindspore::kernel { +namespace acl { +ModelInfer::ModelInfer(const Buffer &om_data, const AclModelOptionsPtr &options) + : init_flag_(false), + load_flag_(false), + device_type_("AscendCL"), + context_(nullptr), + om_data_(om_data), + options_(options), + model_process_(options), + acl_env_(nullptr) {} + +STATUS ModelInfer::Init() { + if (init_flag_) { + MS_LOG(INFO) << "Acl has been initialized, skip."; + return lite::RET_OK; + } + if (options_ == nullptr) { + MS_LOG(ERROR) << "Acl options is nullptr."; + return lite::RET_ERROR; + } + acl_env_ = AclEnvGuard::GetAclEnv(options_->dump_cfg_path); + if (acl_env_ == nullptr) { + MS_LOG(ERROR) << "Acl init failed."; + return lite::RET_ERROR; + } + int32_t device_id = options_->device_id; + aclError ret = aclrtSetDevice(device_id); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Acl open device " << device_id << " failed."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "Open device " << device_id << " success."; + + ret = aclrtCreateContext(&context_, device_id); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Acl create context failed."; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "Create context success."; + + aclrtRunMode run_mode; + ret = aclrtGetRunMode(&run_mode); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Acl get run mode failed."; + return lite::RET_ERROR; + } + bool is_device = (run_mode == ACL_DEVICE); + model_process_.SetIsDevice(is_device); + MS_LOG(INFO) << "Get run mode success is device input/output " << is_device; + + MS_LOG(INFO) << "Init model success, device id " << device_id; + init_flag_ = true; + return lite::RET_OK; +} + +STATUS ModelInfer::Finalize() { + if (!init_flag_) { + MS_LOG(WARNING) << "Init is not ok, no need to finalize."; + return lite::RET_OK; + } + + aclError rt_ret = aclrtSetCurrentContext(context_); + if (rt_ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Set the ascend device context failed."; + return lite::RET_ERROR; + } + if (load_flag_) { + auto ret = model_process_.UnLoad(); + if (ret != lite::RET_OK) { + MS_LOG(ERROR) << "Unload model inner failed."; + return ret; + } + } + if (context_ != nullptr) { + rt_ret = aclrtDestroyContext(context_); + if (rt_ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Destroy context failed."; + } + context_ = nullptr; + } + MS_LOG(INFO) << "End to destroy context."; + + rt_ret = aclrtResetDevice(options_->device_id); + if (rt_ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Reset device " << options_->device_id << " failed."; + } + MS_LOG(INFO) << "End to reset device " << options_->device_id; + init_flag_ = false; + load_flag_ = false; + return lite::RET_OK; +} + +STATUS ModelInfer::Load() { + if (!load_flag_) { + int ret = LoadAclModel(om_data_); + if (ret != lite::RET_OK) { + MS_LOG(ERROR) << "Load model model failed."; + return ret; + } + load_flag_ = true; + } + + aclError rt_ret = aclrtSetCurrentContext(context_); + if (rt_ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Set the ascend device context failed, ret = " << rt_ret; + return lite::RET_ERROR; + } + + return lite::RET_OK; +} + +STATUS ModelInfer::LoadAclModel(const Buffer &om_data) { + MS_LOG(INFO) << "Start load model model."; + // model load model + uint32_t acl_model_id; + auto acl_ret = aclmdlLoadFromMem(om_data.Data(), om_data.DataSize(), &acl_model_id); + if (acl_ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Call aclmdlLoadFromMem failed, ret = " << acl_ret; + return lite::RET_ERROR; + } + + // model init model resource + model_process_.set_model_id(acl_model_id); + int ret = model_process_.PreInitModelResource(); + if (ret != lite::RET_OK) { + (void)aclmdlUnload(acl_model_id); + MS_LOG(ERROR) << "Pre init model resource failed."; + return ret; + } + + MS_LOG(INFO) << "Load model model success."; + return lite::RET_OK; +} + +STATUS ModelInfer::Inference(const std::vector &inputs, const std::vector &outputs) { + if (Load() != lite::RET_OK) { + MS_LOG(ERROR) << "Prepare model resource failed."; + return lite::RET_ERROR; + } + + return model_process_.PredictFromHost(inputs, outputs); +} + +std::set ModelInfer::GetDynamicBatch() { return model_process_.GetDynamicBatch(); } + +// need to be called after model load; +std::set> ModelInfer::GetDynamicImage() { return model_process_.GetDynamicImage(); } +} // namespace acl +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/extendrt/kernel/ascend/model/model_infer.h b/mindspore/lite/src/extendrt/kernel/ascend/model/model_infer.h new file mode 100644 index 00000000000..06b1172a6fb --- /dev/null +++ b/mindspore/lite/src/extendrt/kernel/ascend/model/model_infer.h @@ -0,0 +1,65 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_INFER_H_ +#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_INFER_H_ + +#include +#include +#include +#include +#include +#include "extendrt/kernel/ascend/model/model_process.h" +#include "extendrt/kernel/ascend/model/acl_env_guard.h" +#include "extendrt/kernel/ascend/options/acl_model_options.h" +#include "include/api/types.h" +#include "include/errorcode.h" + +namespace mindspore::kernel { +namespace acl { +using mindspore::lite::STATUS; + +class ModelInfer { + public: + ModelInfer(const Buffer &om_data, const AclModelOptionsPtr &options); + ~ModelInfer() = default; + + STATUS Init(); + STATUS Finalize(); + STATUS Load(); + STATUS Inference(const std::vector &inputs, const std::vector &outputs); + // need to be called after model load + std::set GetDynamicBatch(); + // need to be called after model load + std::set> GetDynamicImage(); + + private: + STATUS LoadAclModel(const Buffer &om_data); + + bool init_flag_; + bool load_flag_; + std::string device_type_; + aclrtContext context_; + Buffer om_data_; + AclModelOptionsPtr options_; + ModelProcess model_process_; + std::shared_ptr acl_env_; +}; + +using ModelInferPtr = std::shared_ptr; +} // namespace acl +} // namespace mindspore::kernel +#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_INFER_H_ diff --git a/mindspore/lite/src/extendrt/kernel/ascend/model/model_process.cc b/mindspore/lite/src/extendrt/kernel/ascend/model/model_process.cc new file mode 100644 index 00000000000..54665a70e69 --- /dev/null +++ b/mindspore/lite/src/extendrt/kernel/ascend/model/model_process.cc @@ -0,0 +1,642 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "extendrt/kernel/ascend/model/model_process.h" +#include +#include +#include +#include +#include "common/log_adapter.h" +#include "src/common/utils.h" +#include "src/common/log_util.h" + +namespace mindspore::kernel { +namespace acl { +namespace { +constexpr size_t kBatchSizeNum = 1; +constexpr size_t kImageSizeHwNum = 2; +} // namespace +static TypeId TransToDataType(aclDataType data_type) { + static const std::map data_type_map = { + {ACL_FLOAT16, TypeId::kNumberTypeFloat16}, {ACL_FLOAT, TypeId::kNumberTypeFloat32}, + {ACL_DOUBLE, TypeId::kNumberTypeFloat64}, {ACL_INT8, TypeId::kNumberTypeInt8}, + {ACL_INT16, TypeId::kNumberTypeInt16}, {ACL_INT32, TypeId::kNumberTypeInt32}, + {ACL_INT64, TypeId::kNumberTypeInt64}, {ACL_UINT8, TypeId::kNumberTypeUInt8}, + {ACL_UINT16, TypeId::kNumberTypeUInt16}, {ACL_UINT32, TypeId::kNumberTypeUInt32}, + {ACL_UINT64, TypeId::kNumberTypeUInt64}, {ACL_BOOL, TypeId::kNumberTypeBool}, + }; + auto it = data_type_map.find(data_type); + if (it == data_type_map.end()) { + return TypeId::kNumberTypeEnd; + } else { + return it->second; + } +} + +template +inline static void ClearIfNotNull(T *vec) { + if (vec != nullptr) { + vec->clear(); + } +} + +template > +inline static void PushbackIfNotNull(U *vec, T &&item) { + if (vec != nullptr) { + vec->emplace_back(item); + } +} + +static STATUS ConstructTensorDesc(const std::vector &acl_tensor_list, std::vector *names, + std::vector> *shapes, std::vector *data_types, + std::vector *mem_sizes) { + ClearIfNotNull(names); + ClearIfNotNull(shapes); + ClearIfNotNull(data_types); + ClearIfNotNull(mem_sizes); + for (size_t i = 0; i < acl_tensor_list.size(); ++i) { + const auto &info = acl_tensor_list[i]; + PushbackIfNotNull(names, info.name); + PushbackIfNotNull(shapes, info.dims); + PushbackIfNotNull(data_types, TransToDataType(info.data_type)); + PushbackIfNotNull(mem_sizes, info.buffer_size); + } + + if (names->size() != acl_tensor_list.size() || shapes->size() != acl_tensor_list.size() || + data_types->size() != acl_tensor_list.size() || mem_sizes->size() != acl_tensor_list.size()) { + MS_LOG(ERROR) << "Inner error, size do not match: names size " << names->size() << " shapes size " << shapes->size() + << " data types size " << data_types->size() << " mem sizes size " << mem_sizes->size() + << " acl_tensor_list size " << acl_tensor_list.size(); + return lite::RET_ERROR; + } + + return lite::RET_OK; +} + +static std::string ShapeToString(const std::vector &shape) { + std::string result = "["; + for (size_t i = 0; i < shape.size(); ++i) { + result += std::to_string(shape[i]); + if (i + 1 < shape.size()) { + result += ", "; + } + } + result += "]"; + return result; +} + +STATUS ModelProcess::PreInitModelResource() { + model_desc_ = aclmdlCreateDesc(); + aclError acl_ret = aclmdlGetDesc(model_desc_, model_id_); + if (acl_ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Read model desc failed, ret = " << acl_ret; + return lite::RET_ERROR; + } + STATUS ret = InitInputsBuffer(); + if (ret != lite::RET_OK) { + MS_LOG(ERROR) << "Create input buffer failed."; + return ret; + } + ret = InitOutputsBuffer(); + if (ret != lite::RET_OK) { + MS_LOG(ERROR) << "Create output buffer failed."; + return ret; + } + return lite::RET_OK; +} + +std::set ModelProcess::GetDynamicBatch() { + if (model_desc_ == nullptr) { + MS_LOG(ERROR) << " Model desc is nullptr."; + return std::set(); + } + aclmdlBatch dynamic_batch; + if (aclmdlGetDynamicBatch(model_desc_, &dynamic_batch) != ACL_SUCCESS) { + MS_LOG(ERROR) << "Failed to get dynamic batch."; + return std::set(); + } + size_t batch_count = dynamic_batch.batchCount; + if (batch_count > ACL_MAX_BATCH_NUM) { + MS_LOG(ERROR) << "Real batch count " << batch_count << " is larger than max " << ACL_MAX_BATCH_NUM; + return std::set(); + } + std::set batch; + for (size_t i = 0; i < dynamic_batch.batchCount; ++i) { + batch.insert(dynamic_batch.batch[i]); + } + return batch; +} + +std::set> ModelProcess::GetDynamicImage() { + if (model_desc_ == nullptr) { + MS_LOG(ERROR) << " Model desc is nullptr."; + return std::set>(); + } + aclmdlHW dynamic_hw; + if (aclmdlGetDynamicHW(model_desc_, 0, &dynamic_hw) != ACL_SUCCESS) { + MS_LOG(ERROR) << "Failed to get dynamic hw."; + return std::set>(); + } + size_t hw_count = dynamic_hw.hwCount; + if (hw_count > ACL_MAX_HW_NUM) { + MS_LOG(ERROR) << "Real hw count " << hw_count << " is larger than max " << ACL_MAX_HW_NUM; + return std::set>(); + } + std::set> image; + for (size_t i = 0; i < dynamic_hw.hwCount; ++i) { + image.insert(std::pair(dynamic_hw.hw[i][0], dynamic_hw.hw[i][1])); + } + return image; +} + +STATUS ModelProcess::InitInputsBuffer() { + aclError ret; + size_t input_size = aclmdlGetNumInputs(model_desc_); + MS_LOG(INFO) << "input_size = " << input_size; + for (size_t i = 0; i < input_size; ++i) { + auto buffer_size = aclmdlGetInputSizeByIndex(model_desc_, i); + void *data_mem_buffer = nullptr; + if (!is_run_on_device_) { // need to copy input/output to/from device + ret = aclrtMalloc(&data_mem_buffer, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Malloc device input buffer failed , input size " << buffer_size; + return lite::RET_ERROR; + } + } + + aclmdlIODims dims; + ret = aclmdlGetInputDims(model_desc_, i, &dims); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Get input shape failed, ret = " << ret; + if (!is_run_on_device_) { + aclrtFree(data_mem_buffer); + } + return lite::RET_ERROR; + } + aclDataType data_type = aclmdlGetInputDataType(model_desc_, i); + std::vector shape(dims.dims, dims.dims + dims.dimCount); + std::string input_name = aclmdlGetInputNameByIndex(model_desc_, i); + if (input_name.empty()) { + MS_LOG(WARNING) << "Get name of input " << i << " failed."; + } + MS_LOG(INFO) << "Name of input " << i << " is " << input_name; + input_infos_.emplace_back( + AclTensorInfo{data_mem_buffer, data_mem_buffer, buffer_size, data_type, shape, input_name}); + } + MS_LOG(INFO) << "Create model inputs success"; + return lite::RET_OK; +} + +STATUS ModelProcess::CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) { + if (data_mem_buffer == nullptr) { + MS_LOG(ERROR) << "Data mem buffer is nullptr."; + return lite::RET_ERROR; + } + aclError ret; + auto free_data_buffer = [this](void *dataMemBuffer) { + if (!is_run_on_device_) { + (void)aclrtFree(dataMemBuffer); + } else { + (void)aclrtFreeHost(dataMemBuffer); + } + }; + + if (!is_run_on_device_) { + ret = aclrtMalloc(data_mem_buffer, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Malloc device buffer failed , buffer size " << buffer_size; + return lite::RET_ERROR; + } + } else { + ret = aclrtMallocHost(data_mem_buffer, buffer_size); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Malloc host buffer failed , buffer size " << buffer_size; + return lite::RET_ERROR; + } + } + + auto data_buffer = aclCreateDataBuffer(*data_mem_buffer, buffer_size); + if (data_buffer == nullptr) { + MS_LOG(ERROR) << "Create Data Buffer failed"; + free_data_buffer(*data_mem_buffer); + return lite::RET_ERROR; + } + ret = aclmdlAddDatasetBuffer(dataset, data_buffer); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "add data buffer failed"; + free_data_buffer(*data_mem_buffer); + aclDestroyDataBuffer(data_buffer); + return lite::RET_ERROR; + } + return lite::RET_OK; +} + +STATUS ModelProcess::InitOutputsBuffer() { + aclError ret; + outputs_ = aclmdlCreateDataset(); + if (outputs_ == nullptr) { + MS_LOG(ERROR) << "Create output dataset failed"; + return lite::RET_ERROR; + } + size_t output_size = aclmdlGetNumOutputs(model_desc_); + MS_LOG(INFO) << "Output_size = " << output_size; + for (size_t i = 0; i < output_size; ++i) { + auto buffer_size = aclmdlGetOutputSizeByIndex(model_desc_, i); + + void *data_mem_buffer = nullptr; + if (CreateDataBuffer(&data_mem_buffer, buffer_size, outputs_) != lite::RET_OK) { + MS_LOG(ERROR) << "Add output data buffer failed, buffer size " << buffer_size; + return lite::RET_ERROR; + } + aclmdlIODims dims; + ret = aclmdlGetOutputDims(model_desc_, i, &dims); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Get output shape failed"; + if (!is_run_on_device_) { + aclrtFree(data_mem_buffer); + } else { + aclrtFreeHost(data_mem_buffer); + } + return lite::RET_OK; + } + aclFormat format = aclmdlGetOutputFormat(model_desc_, i); + MS_LOG(DEBUG) << "The output format of om is " << format; + aclDataType data_type = aclmdlGetOutputDataType(model_desc_, i); + std::vector shape(dims.dims, dims.dims + dims.dimCount); + std::string output_name = aclmdlGetOutputNameByIndex(model_desc_, i); + if (output_name.empty()) { + MS_LOG(WARNING) << "Get name of output " << i << " failed."; + } + MS_LOG(INFO) << "Name of om output " << i << " is " << output_name << "Buffer size " << buffer_size; + output_infos_.emplace_back( + AclTensorInfo{data_mem_buffer, data_mem_buffer, buffer_size, data_type, shape, output_name}); + } + MS_LOG(INFO) << "Create model output success."; + return lite::RET_OK; +} + +void ModelProcess::DestroyInputsDataset() { + if (inputs_ == nullptr) { + return; + } + for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(inputs_); i++) { + auto dataBuffer = aclmdlGetDatasetBuffer(inputs_, i); + aclDestroyDataBuffer(dataBuffer); + } + aclmdlDestroyDataset(inputs_); + inputs_ = nullptr; +} + +void ModelProcess::DestroyInputsDataMem() { + if (!is_run_on_device_) { + for (const auto &item : input_infos_) { + aclrtFree(item.device_data); + } + } + input_infos_.clear(); +} + +void ModelProcess::DestroyInputsBuffer() { + DestroyInputsDataMem(); + DestroyInputsDataset(); +} + +void ModelProcess::DestroyOutputsBuffer() { + for (const auto &item : output_infos_) { + if (!is_run_on_device_) { + aclrtFree(item.device_data); + } else { + aclrtFreeHost(item.device_data); + } + } + output_infos_.clear(); + + if (outputs_ == nullptr) { + return; + } + for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(outputs_); i++) { + auto dataBuffer = aclmdlGetDatasetBuffer(outputs_, i); + aclDestroyDataBuffer(dataBuffer); + } + aclmdlDestroyDataset(outputs_); + outputs_ = nullptr; +} + +STATUS ModelProcess::UnLoad() { + auto ret = aclmdlUnload(model_id_); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Unload model failed, ret = " << ret; + return lite::RET_ERROR; + } + if (model_desc_ != nullptr) { + ret = aclmdlDestroyDesc(model_desc_); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Unload model failed, ret = " << ret; + return lite::RET_ERROR; + } + model_desc_ = nullptr; + } + DestroyInputsBuffer(); + DestroyOutputsBuffer(); + MS_LOG(INFO) << "End unload model " << model_id_; + return lite::RET_OK; +} + +STATUS ModelProcess::SetBatchSize(const std::vector &inputs) { + for (size_t i = 0; i < inputs.size(); i++) { + input_infos_[i].buffer_size = inputs[i]->GetData()->size; + } + auto batch_size_tensor = inputs[inputs.size() - 1]; + size_t data_type_size = lite::DataTypeSize(batch_size_tensor->GetDtype()); + size_t num = 0; + if (data_type_size != 0) { + num = batch_size_tensor->GetData()->size / data_type_size; + } + if (num != kBatchSizeNum) { + MS_LOG(ERROR) << "Batch size num should be " << kBatchSizeNum; + return lite::RET_ERROR; + } + auto *ptr = reinterpret_cast(batch_size_tensor->GetData()->addr); + CHECK_NULL_RETURN(ptr); + auto batch_size = ptr[0]; + aclError ret; + size_t index; + ret = aclmdlGetInputIndexByName(model_desc_, ACL_DYNAMIC_TENSOR_NAME, &index); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Get index failed"; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "Set Batch size(" << batch_size << ") of input " << index << "."; + ret = aclmdlSetDynamicBatchSize(model_id_, inputs_, index, batch_size); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_; + return lite::RET_ERROR; + } + return lite::RET_OK; +} + +STATUS ModelProcess::SetImageSize(const std::vector &inputs) { + for (size_t i = 0; i < inputs.size(); i++) { + input_infos_[i].buffer_size = inputs[i]->GetData()->size; + } + auto image_size_tensor = inputs[inputs.size() - 1]; + size_t data_type_size = lite::DataTypeSize(image_size_tensor->GetDtype()); + size_t num = 0; + if (data_type_size != 0) { + num = image_size_tensor->GetData()->size / data_type_size; + } + if (num != kImageSizeHwNum) { + MS_LOG(ERROR) << "Image size hw num should be " << kImageSizeHwNum; + return lite::RET_ERROR; + } + auto *hw = reinterpret_cast(image_size_tensor->GetData()->addr); + CHECK_NULL_RETURN(hw); + int32_t height = hw[0]; + int32_t width = hw[1]; + size_t index; + aclError ret = ACL_ERROR_NONE; + ret = aclmdlGetInputIndexByName(model_desc_, ACL_DYNAMIC_TENSOR_NAME, &index); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Get index failed"; + return lite::RET_ERROR; + } + MS_LOG(INFO) << "Set Image size(" << height << "," << width << ") of input " << index << "."; + ret = aclmdlSetDynamicHWSize(model_id_, inputs_, index, height, width); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_; + return lite::RET_ERROR; + } + return lite::RET_OK; +} + +STATUS ModelProcess::CheckTensorByTensorInfo(const std::vector &tensor, + const std::vector &tensor_info) { + if (!IsDynamicShape()) { + for (size_t i = 0; i < tensor_info.size(); ++i) { + if (tensor[i]->GetShapeVector() != tensor_info[i].dims) { + MS_LOG(WARNING) << "Note: input " << i << " shape not match, required " << ShapeToString(tensor_info[i].dims) + << ", given " << ShapeToString(tensor[i]->GetShapeVector()) << "." + << "Please check input shape has been modified by DVPP method."; + } + if (tensor[i]->GetDtype() != TransToDataType(tensor_info[i].data_type)) { + MS_LOG(ERROR) << "Note: input " << i << " data type not match, required " + << static_cast(TransToDataType(tensor_info[i].data_type)) << ", given " + << static_cast(tensor[i]->GetDtype()); + return lite::RET_ERROR; + } + if (tensor[i]->GetData()->size != tensor_info[i].buffer_size) { + MS_LOG(ERROR) << "Input " << i << " data size not match, required size " << tensor_info[i].buffer_size + << ", given count " << tensor[i]->GetData()->size; + return lite::RET_ERROR; + } + } + } + return lite::RET_OK; +} + +STATUS ModelProcess::ProcDynamicShape(const std::vector &inputs) { + if (!IsDynamicShape()) { + MS_LOG(DEBUG) << "Input is not dynamic shape"; + return lite::RET_OK; + } + if (IsDynamicBatchSize()) { + if (SetBatchSize(inputs) != lite::RET_OK) { + MS_LOG(ERROR) << "Set dynamic batch size failed."; + return lite::RET_ERROR; + } + } + if (IsDynamicImageSize()) { + if (SetImageSize(inputs) != lite::RET_OK) { + MS_LOG(ERROR) << "Set dynamic image size failed."; + return lite::RET_ERROR; + } + } + if (ResetOutputSize() != lite::RET_OK) { + MS_LOG(ERROR) << "Reset output size failed"; + return lite::RET_ERROR; + } + return lite::RET_OK; +} + +bool ModelProcess::IsDynamicShape() { return IsDynamicBatchSize() || IsDynamicImageSize(); } + +bool ModelProcess::IsDynamicBatchSize() { return !GetDynamicBatch().empty(); } + +bool ModelProcess::IsDynamicImageSize() { return !GetDynamicImage().empty(); } + +STATUS ModelProcess::CheckAndInitInput(const std::vector &inputs) { + aclError ret; + inputs_ = aclmdlCreateDataset(); + // check inputs + if (CheckTensorByTensorInfo(inputs, input_infos_) != lite::RET_OK) { + MS_LOG(ERROR) << "Check input tensor failed."; + return lite::RET_ERROR; + } + // copy inputs + for (size_t i = 0; i < input_infos_.size(); ++i) { + auto &info = input_infos_[i]; + auto input = inputs[i]; + void *data = input->GetData()->addr; + void *input_buffer = nullptr; + if (!is_run_on_device_) { + info.cur_device_data = info.device_data; + ret = + aclrtMemcpy(info.cur_device_data, info.buffer_size, data, input->GetData()->size, ACL_MEMCPY_HOST_TO_DEVICE); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Acl memcpy input " << i + << " data to device failed, src input size: " << input->GetData()->size + << ", dst device buffer size: " << info.buffer_size; + return lite::RET_ERROR; + } + input_buffer = info.cur_device_data; + } else { + input_buffer = data; + } + auto data_buffer = aclCreateDataBuffer(input_buffer, info.buffer_size); + if (data_buffer == nullptr) { + MS_LOG(ERROR) << "Create Data Buffer failed"; + return lite::RET_ERROR; + } + ret = aclmdlAddDatasetBuffer(inputs_, data_buffer); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Add data buffer failed"; + aclDestroyDataBuffer(data_buffer); + return lite::RET_ERROR; + } + } + if (ProcDynamicShape(inputs) != lite::RET_OK) { + MS_LOG(ERROR) << "Proc input dynamic shape failed."; + return lite::RET_ERROR; + } + return lite::RET_OK; +} + +STATUS ModelProcess::ResetOutputSize() { + aclDataType output_type; + aclError ret; + size_t output_size = aclmdlGetNumOutputs(model_desc_); + for (size_t index = 0; index < output_size; index++) { + size_t dims = 1; + struct aclmdlIODims output_dims; + ret = aclmdlGetCurOutputDims(model_desc_, index, &output_dims); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "get output dim error."; + return lite::RET_ERROR; + } + std::vector shape(output_dims.dims, output_dims.dims + output_dims.dimCount); + for (size_t i = 0; i < output_dims.dimCount; i++) { + dims *= output_dims.dims[i]; + } + output_type = aclmdlGetOutputDataType(model_desc_, index); + output_infos_[index].dims = shape; + output_infos_[index].buffer_size = dims * aclDataTypeSize(output_type); + } + return lite::RET_OK; +} + +STATUS ModelProcess::PredictFromHost(const std::vector &inputs, + const std::vector &outputs) { + STATUS ret = CheckAndInitInput(inputs); + if (ret != lite::RET_OK) { + MS_LOG(ERROR) << "Check or init input failed"; + DestroyInputsDataset(); + return ret; // forward status error + } + + aclError acl_ret; + auto env = std::getenv("GLOG_v"); + if (env != nullptr && env[0] == '1') { + struct timeval start_time; + struct timeval end_time; + (void)gettimeofday(&start_time, nullptr); + acl_ret = aclmdlExecute(model_id_, inputs_, outputs_); + (void)gettimeofday(&end_time, nullptr); + constexpr uint64_t kUSecondInSecond = 1000000; + uint64_t cost = + (kUSecondInSecond * static_cast(end_time.tv_sec) + static_cast(end_time.tv_usec)) - + (kUSecondInSecond * static_cast(start_time.tv_sec) + static_cast(start_time.tv_usec)); + MS_LOG(INFO) << "Model execute in " << cost << " us"; + } else { + acl_ret = aclmdlExecute(model_id_, inputs_, outputs_); + } + + DestroyInputsDataset(); + if (acl_ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Execute Model Failed, ret = " << acl_ret; + return lite::RET_ERROR; + } + ret = GetOutputs(outputs); + if (ret != lite::RET_OK) { + MS_LOG(ERROR) << "Build outputs failed"; + return ret; + } + MS_LOG(INFO) << "Execute model success"; + return lite::RET_OK; +} + +STATUS ModelProcess::GetOutputs(const std::vector &outputs) { + if (outputs.empty()) { + MS_LOG(ERROR) << "Ms tensor outputs is empty."; + return lite::RET_ERROR; + } + + if (ConstructTensor(outputs) != lite::RET_OK) { + MS_LOG(ERROR) << "Construct ms tensor failed."; + return lite::RET_ERROR; + } + return lite::RET_OK; +} + +STATUS ModelProcess::ConstructTensor(const std::vector &outputs) { + if (outputs.size() != output_infos_.size()) { + MS_LOG(ERROR) << "Actual tensor count not match, required count " << output_infos_.size() << ", given count " + << outputs.size(); + return lite::RET_ERROR; + } + std::vector names; + std::vector> shapes; + std::vector data_types; + std::vector mem_sizes; + if (ConstructTensorDesc(output_infos_, &names, &shapes, &data_types, &mem_sizes) != lite::RET_OK) { + MS_LOG(ERROR) << "Construct tensor desc failed."; + return lite::RET_ERROR; + } + // set output info and malloc data size + for (size_t i = 0; i < output_infos_.size(); ++i) { + if (outputs[i]->GetData()->size != mem_sizes[i]) { + MS_LOG(ERROR) << "Ms tensor size " << outputs[i]->GetData()->size << " not match model tensor size " + << mem_sizes[i]; + return lite::RET_ERROR; + } + } + aclrtMemcpyKind kind = is_run_on_device_ ? ACL_MEMCPY_HOST_TO_HOST : ACL_MEMCPY_DEVICE_TO_HOST; + for (size_t i = 0; i < output_infos_.size(); ++i) { + if (output_infos_[i].cur_device_data == nullptr) { + // when run on device, cur_device_data is nullptr before first execute + continue; + } + auto ret = aclrtMemcpy(outputs[i]->GetData()->addr, outputs[i]->GetData()->size, output_infos_[i].cur_device_data, + output_infos_[i].buffer_size, kind); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Memcpy input " << i << " from " << (is_run_on_device_ ? "host" : "device") + << " to host failed, memory size " << output_infos_[i].buffer_size; + return lite::RET_ERROR; + } + } + return lite::RET_OK; +} +} // namespace acl +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/extendrt/kernel/ascend/model/model_process.h b/mindspore/lite/src/extendrt/kernel/ascend/model/model_process.h new file mode 100644 index 00000000000..0518f899139 --- /dev/null +++ b/mindspore/lite/src/extendrt/kernel/ascend/model/model_process.h @@ -0,0 +1,104 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_PROCESS_H_ +#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_PROCESS_H_ + +#include +#include +#include +#include +#include +#include "acl/acl.h" +#include "acl/acl_mdl.h" +#include "acl/acl_rt.h" +#include "include/api/types.h" +#include "include/errorcode.h" +#include "kernel/kernel.h" +#include "extendrt/kernel/ascend/options/acl_model_options.h" + +namespace mindspore::kernel { +namespace acl { +using mindspore::lite::STATUS; +struct AclTensorInfo { + void *cur_device_data; + void *device_data; + size_t buffer_size; + aclDataType data_type; + std::vector dims; + std::string name; +}; + +class ModelProcess { + public: + explicit ModelProcess(const AclModelOptionsPtr &options) + : options_(options), + model_id_(0xffffffff), + is_run_on_device_(false), + model_desc_(nullptr), + inputs_(nullptr), + outputs_(nullptr), + input_infos_(), + output_infos_() {} + ~ModelProcess() {} + + STATUS UnLoad(); + STATUS PredictFromHost(const std::vector &inputs, const std::vector &outputs); + STATUS PreInitModelResource(); + + // override this method to avoid request/reply data copy + void SetIsDevice(bool is_device) { is_run_on_device_ = is_device; } + + void set_model_id(uint32_t model_id) { model_id_ = model_id; } + uint32_t model_id() const { return model_id_; } + std::set GetDynamicBatch(); + std::set> GetDynamicImage(); + + private: + STATUS CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset); + STATUS CheckAndInitInput(const std::vector &inputs); + STATUS CheckTensorByTensorInfo(const std::vector &tensor, + const std::vector &tensor_info); + STATUS GetOutputs(const std::vector &outputs); + STATUS ConstructTensor(const std::vector &outputs); + STATUS SetBatchSize(const std::vector &inputs); + STATUS SetImageSize(const std::vector &inputs); + STATUS InitInputsBuffer(); + STATUS InitOutputsBuffer(); + STATUS ResetOutputSize(); + STATUS ProcDynamicShape(const std::vector &inputs); + std::string VectorToString(const std::vector &); + bool IsDynamicShape(); + bool IsDynamicBatchSize(); + bool IsDynamicImageSize(); + void DestroyInputsDataset(); + void DestroyInputsDataMem(); + void DestroyInputsBuffer(); + void DestroyOutputsBuffer(); + + AclModelOptionsPtr options_; + uint32_t model_id_; + // if run one device(AICPU), there is no need to alloc device memory and copy inputs to(/outputs from) device + bool is_run_on_device_; + aclmdlDesc *model_desc_; + aclmdlDataset *inputs_; + aclmdlDataset *outputs_; + std::vector input_infos_; + std::vector output_infos_; +}; +} // namespace acl +} // namespace mindspore::kernel +#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_PROCESS_H_ diff --git a/mindspore/lite/src/extendrt/kernel/ascend/options/acl_model_options.h b/mindspore/lite/src/extendrt/kernel/ascend/options/acl_model_options.h new file mode 100644 index 00000000000..15f05b429e7 --- /dev/null +++ b/mindspore/lite/src/extendrt/kernel/ascend/options/acl_model_options.h @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_MODEL_OPTIONS_H_ +#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_MODEL_OPTIONS_H_ + +#include +#include +#include +#include + +namespace mindspore::kernel { +namespace acl { +struct AclModelOptions { + int32_t device_id; + std::string dump_cfg_path; + std::set batch_size; + std::set> image_size; + + AclModelOptions() : device_id(0) {} +}; + +using AclModelOptionsPtr = std::shared_ptr; +} // namespace acl +} // namespace mindspore::kernel +#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_MODEL_OPTIONS_H_ diff --git a/mindspore/lite/src/extendrt/kernel/ascend/options/acl_options_parser.cc b/mindspore/lite/src/extendrt/kernel/ascend/options/acl_options_parser.cc new file mode 100644 index 00000000000..0e11b09bc17 --- /dev/null +++ b/mindspore/lite/src/extendrt/kernel/ascend/options/acl_options_parser.cc @@ -0,0 +1,80 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "extendrt/kernel/ascend/options/acl_options_parser.h" +#include +#include +#include "common/log_adapter.h" +#include "src/common/log_util.h" +#include "src/common/utils.h" +#include "acl/acl_base.h" +#include "acl/acl_rt.h" + +namespace mindspore::kernel { +namespace acl { +constexpr auto kImageHwNum = 2; + +STATUS AclOptionsParser::ParseAclOptions(const mindspore::Context *ctx, AclModelOptionsPtr *const acl_options) { + CHECK_NULL_RETURN(ctx); + CHECK_NULL_RETURN(acl_options); + + auto context = const_cast(ctx); + CHECK_NULL_RETURN(context); + auto device_infos = context->MutableDeviceInfo(); + if (device_infos.size() < 1) { + MS_LOG(WARNING) << "Context is not set device info, please check."; + return lite::RET_OK; + } + CHECK_NULL_RETURN(device_infos[0]); + if (ParseOptions(device_infos[0], acl_options) != lite::RET_OK) { + MS_LOG(ERROR) << "Parse model options failed."; + return lite::RET_ERROR; + } + return lite::RET_OK; +} + +STATUS AclOptionsParser::ParseOptions(const std::shared_ptr &device_info, + AclModelOptions *acl_options) { + auto ascend_info = device_info->Cast(); + if (ascend_info == nullptr) { + MS_LOG(ERROR) << "There is no ascend info."; + return lite::RET_ERROR; + } + int32_t device_id = static_cast(ascend_info->GetDeviceID()); + if (CheckDeviceId(&device_id) != lite::RET_OK) { + MS_LOG(ERROR) << "Check device id failed, device id = " << device_id; + return lite::RET_ERROR; + } + acl_options->device_id = device_id; + return lite::RET_OK; +} + +STATUS AclOptionsParser::CheckDeviceId(int32_t *device_id) { + CHECK_NULL_RETURN(device_id); + uint32_t device_count; + if (aclrtGetDeviceCount(&device_count) != ACL_ERROR_NONE) { + MS_LOG(WARNING) << "Get device count failed."; + return lite::RET_OK; + } + if (*device_id >= static_cast(device_count)) { + MS_LOG(ERROR) << "Current device id " << *device_id << " is larger than max count " << device_count + << ",please check the device info of context."; + return lite::RET_ERROR; + } + return lite::RET_OK; +} +} // namespace acl +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/extendrt/kernel/ascend/options/acl_options_parser.h b/mindspore/lite/src/extendrt/kernel/ascend/options/acl_options_parser.h new file mode 100644 index 00000000000..e11e55ced07 --- /dev/null +++ b/mindspore/lite/src/extendrt/kernel/ascend/options/acl_options_parser.h @@ -0,0 +1,41 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_OPTIONS_PARSER_H_ +#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_OPTIONS_PARSER_H_ + +#include +#include +#include "include/api/context.h" +#include "include/errorcode.h" +#include "extendrt/kernel/ascend/options/acl_model_options.h" + +namespace mindspore::kernel { +namespace acl { +using mindspore::lite::STATUS; + +class AclOptionsParser { + public: + STATUS ParseAclOptions(const mindspore::Context *ctx, AclModelOptionsPtr *const acl_options); + + private: + STATUS ParseOptions(const std::shared_ptr &device_info, AclModelOptions *acl_options); + STATUS CheckDeviceId(int32_t *device_id); +}; +} // namespace acl +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_OPTIONS_PARSER_H_ diff --git a/mindspore/lite/src/extendrt/single_op_session.cc b/mindspore/lite/src/extendrt/single_op_session.cc index e3de3b579de..031e996f761 100644 --- a/mindspore/lite/src/extendrt/single_op_session.cc +++ b/mindspore/lite/src/extendrt/single_op_session.cc @@ -52,16 +52,19 @@ Status SingleOpInferSession::CompileGraph(FuncGraphPtr graph) { for (const auto &kernel_node : kernel_nodes) { mindspore::infer::SetKernelInfo(kernel_node); std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); - std::shared_ptr cpu_kernel_mod = - kernel::Factory::Instance().Create(kernel_name); + std::shared_ptr kernel_mod = kernel::Factory::Instance().Create(kernel_name); MS_LOG(INFO) << "SingleOpInferSession::Kernels " << kernel_name; auto args = kernel::AbstractArgsFromCNode(kernel_node); - auto ret = cpu_kernel_mod->Init(args.op, args.inputs, args.outputs); + if (kernel_mod == nullptr) { + MS_LOG(EXCEPTION) << "Kernel mod is nullptr, kernel name: " << kernel_name; + } + mindspore::infer::CopyInputWeights(kernel_node, args.inputs); + auto ret = kernel_mod->Init(args.op, args.inputs, args.outputs); MS_LOG(INFO) << "SingleOpInferSession::Kernels ret " << ret; if (!ret) { MS_LOG(EXCEPTION) << "kernel init failed " << kernel_name; } - if (cpu_kernel_mod->Resize(args.op, args.inputs, args.outputs, kernel::GetKernelDepends(kernel_node)) == + if (kernel_mod->Resize(args.op, args.inputs, args.outputs, kernel::GetKernelDepends(kernel_node)) == kernel::KRET_RESIZE_FAILED) { MS_LOG(EXCEPTION) << "CPU kernel op [" << kernel_node->fullname_with_scope() << "] Resize failed."; } @@ -90,10 +93,10 @@ Status SingleOpInferSession::CompileGraph(FuncGraphPtr graph) { tensor_size = std::max(tensor_size, type_size); (void)output_size_list.emplace_back(tensor_size); } - cpu_kernel_mod->SetInputSizeList(input_size_list); - cpu_kernel_mod->SetOutputSizeList(output_size_list); + kernel_mod->SetInputSizeList(input_size_list); + kernel_mod->SetOutputSizeList(output_size_list); - AnfAlgo::SetKernelMod(cpu_kernel_mod, kernel_node.get()); + AnfAlgo::SetKernelMod(kernel_mod, kernel_node.get()); } this->AssignKernelGraphAddress(kernel_graph_); @@ -284,9 +287,29 @@ device::DeviceAddressPtr SingleOpInferSession::CreateDeviceAddress(void *device_ return std::make_shared(device_ptr, device_size, format, type_id); } +std::vector SingleOpInferSession::GetGraphDataInputs() const { + MS_EXCEPTION_IF_NULL(kernel_graph_); + std::vector data_inputs; + auto inputs = kernel_graph_->inputs(); + for (auto input : inputs) { + if (input->isa()) { + auto parameter = input->cast(); + if (parameter != nullptr && !parameter->has_default()) { + data_inputs.push_back(input); + } + } + } + return data_inputs; +} + void SingleOpInferSession::CopyInputs(const std::vector inputs) { MS_EXCEPTION_IF_NULL(kernel_graph_); - auto graph_inputs = kernel_graph_->inputs(); + auto graph_inputs = GetGraphDataInputs(); + if (graph_inputs.size() != inputs.size()) { + MS_LOG(ERROR) << "Graph inputs size[" << graph_inputs.size() << "] is not equal to User input size[ " + << inputs.size() << "]."; + return; + } for (size_t i = 0; i < graph_inputs.size(); i++) { auto input = inputs[i]; auto graph_input = graph_inputs[i]; diff --git a/mindspore/lite/src/extendrt/single_op_session.h b/mindspore/lite/src/extendrt/single_op_session.h index 7a5c6bea86e..4cc6e9f38bc 100644 --- a/mindspore/lite/src/extendrt/single_op_session.h +++ b/mindspore/lite/src/extendrt/single_op_session.h @@ -49,6 +49,7 @@ class SingleOpInferSession : public InferSession { device::DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, TypeId type_id) const; void CopyInputs(const std::vector inputs); + std::vector GetGraphDataInputs() const; void CopyOutputs(std::vector *outputs); private: diff --git a/mindspore/lite/src/extendrt/utils/kernel_build_utils.cc b/mindspore/lite/src/extendrt/utils/kernel_build_utils.cc index 0d3a66e5b16..1562185e92d 100644 --- a/mindspore/lite/src/extendrt/utils/kernel_build_utils.cc +++ b/mindspore/lite/src/extendrt/utils/kernel_build_utils.cc @@ -35,6 +35,9 @@ using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm; using mindspore::kernel::KernelBuildInfo; namespace { constexpr auto kParamDynamic = "dynamic"; +constexpr auto kCustomAscendInputNum = 3; +constexpr auto kNameCustomAscend = "CustomAscend"; +constexpr auto kCustomTypeAscend = "acl_build"; bool IsInputNotCNode(const CNodePtr &kernel_node, size_t input_index) { auto input_node = common::AnfAlgo::VisitKernel(kernel_node->input(input_index + 1), 0).first; @@ -326,8 +329,9 @@ void UpdateCustomKernelBuildInfo(const CNodePtr &kernel_node, bool is_akg_op) { GetOutputFormat(kernel_node, &output_formats); builder->SetOutputsDeviceType(output_types); builder->SetOutputsFormat(output_formats); - // AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), kernel_node.get()); - + if (op_name == kNameCustomAscend) { + AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), kernel_node.get()); + } // check reg info if kernel_attr is not null if (kernel_attr != nullptr) { std::vector> kernel_info_list; @@ -465,6 +469,10 @@ std::pair SetKernelInfoWithMsg(const CNodePtr &kerne UpdateCustomKernelBuildInfo(kernel_node, true); return {}; } + if (tp == kCustomTypeAscend) { + UpdateCustomKernelBuildInfo(kernel_node, false); + return {}; + } // If Custom op has not set reg info, then infer info from inputs if (mindspore::kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kCPU) == nullptr) { MS_LOG(WARNING) << "Not find operator information for Custom operator[" << op_name << "]. " @@ -535,5 +543,49 @@ void SetKernelInfo(const CNodePtr &kernel_node) { if (msg.empty()) return; MS_EXCEPTION(etype) << msg; } + +void CopyInputWeights(const CNodePtr &kernel_node, const std::vector &inputs) { + std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); + if (kernel_name == kNameCustomAscend) { + auto node_input_size = kernel_node->inputs().size(); + if (node_input_size < kCustomAscendInputNum) { + MS_LOG(ERROR) << "Input num of custom ascend kernel should larger than " << (kCustomAscendInputNum - 1) + << ", real num is " << node_input_size; + return; + } + if (node_input_size != inputs.size() + 1) { + MS_LOG(ERROR) << "Input num of custom ascend kernel [" << node_input_size << "]" + << " is not equal to kernel tensor size[" << (inputs.size() + 1) << "]."; + return; + } + auto om_input = kernel_node->input(node_input_size - 1); + if (!om_input->isa()) { + MS_LOG(ERROR) << "Om input is not parameter."; + return; + } + ParameterPtr om_param = om_input->cast(); + if (om_param == nullptr || !om_param->has_default()) { + MS_LOG(ERROR) << "Om param is invalid, val= " << om_param; + return; + } + auto tensor = std::static_pointer_cast(om_param->default_param()); + if (tensor == nullptr) { + MS_LOG(ERROR) << "Tensor is nullptr."; + return; + } + if (tensor->data_c() == nullptr || tensor->Size() == 0) { + MS_LOG(ERROR) << "Tensor data is invalid."; + return; + } + auto new_addr = malloc(tensor->Size()); + if (new_addr == nullptr) { + MS_LOG(ERROR) << "Malloc failed, size= " << tensor->Size(); + return; + } + memcpy(new_addr, tensor->data_c(), tensor->Size()); + kernel::AddressPtr addr_ptr = std::make_shared(new_addr, tensor->Size()); + inputs[inputs.size() - 1]->SetData(addr_ptr); + } +} } // namespace infer } // namespace mindspore diff --git a/mindspore/lite/src/extendrt/utils/kernel_build_utils.h b/mindspore/lite/src/extendrt/utils/kernel_build_utils.h index 003d631bf65..485de9c796e 100644 --- a/mindspore/lite/src/extendrt/utils/kernel_build_utils.h +++ b/mindspore/lite/src/extendrt/utils/kernel_build_utils.h @@ -26,11 +26,13 @@ #include "ir/anf.h" #include "ir/dtype/type.h" #include "include/common/utils/utils.h" +#include "mindspore/ccsrc/kernel/kernel.h" namespace mindspore { namespace infer { using DataType = std::pair; void SetKernelInfo(const CNodePtr &apply_kernel_ptr); +void CopyInputWeights(const CNodePtr &kernel_node, const std::vector &inputs); } // namespace infer } // namespace mindspore diff --git a/mindspore/lite/tools/converter/adapter/acl/src/acl_model_process.cc b/mindspore/lite/tools/converter/adapter/acl/src/acl_model_process.cc index 16006473503..f800ed0d1ed 100644 --- a/mindspore/lite/tools/converter/adapter/acl/src/acl_model_process.cc +++ b/mindspore/lite/tools/converter/adapter/acl/src/acl_model_process.cc @@ -15,7 +15,11 @@ */ #include "tools/converter/adapter/acl/src/acl_model_process.h" +#ifdef ENABLE_CLOUD_FUSION_INFERENCE +#include "src/extendrt/kernel/ascend/model/acl_env_guard.h" +#else #include "src/runtime/kernel/ascend/src/acl_env_guard.h" +#endif #include "src/common/log_util.h" #include "acl/acl.h" #include "acl/acl_rt.h" diff --git a/mindspore/lite/tools/converter/adapter/acl/src/acl_pass_impl.cc b/mindspore/lite/tools/converter/adapter/acl/src/acl_pass_impl.cc index 322462ebe7b..73a01edbd7e 100644 --- a/mindspore/lite/tools/converter/adapter/acl/src/acl_pass_impl.cc +++ b/mindspore/lite/tools/converter/adapter/acl/src/acl_pass_impl.cc @@ -52,6 +52,8 @@ constexpr auto kInferShapePass = "InferShapePass"; constexpr auto kConstFoldPass = "ConstFoldPass"; constexpr auto kRemoveRedundantOpPass = "RemoveRedundantOpPass"; constexpr auto kDelRedundantTranspose = "DeleteRedundantTranspose"; +constexpr auto kFuncType = "func_type"; +constexpr auto kUniqueName = "uniq_name"; constexpr size_t kDependInputNum = 3; constexpr size_t kDependFirstInputIdx = 1; constexpr size_t kTupleGetItemFirstInputIdx = 1; @@ -155,6 +157,10 @@ STATUS AclPassImpl::PreProcGraph(const FuncGraphPtr &func_graph) { } STATUS AclPassImpl::PostProcGraph(const FuncGraphPtr &func_graph) { + if (lite::acl::DelRedundantParameter(func_graph) != RET_SUCCESS) { + MS_LOG(ERROR) << "Delete redundant parameters failed."; + return lite::RET_ERROR; + } if (!user_options_cfg_.offline) { MS_LOG(DEBUG) << "Online model infer no need to change to nhwc format."; return lite::RET_OK; @@ -548,6 +554,8 @@ void AclPassImpl::SetCustomAttrs(const std::shared_ptr &prim) { std::vector output_dim_char(output_dim_str.begin(), output_dim_str.end()); std::map> attrs = {{lite::acl::kOutputShapes, output_dim_char}}; prim->set_attr(attrs); + prim->AddAttr(kFuncType, api::MakeValue("acl_build")); + prim->AddAttr(kUniqueName, api::MakeValue("CustomAscend")); } CNodePtr AclPassImpl::CreateCustomNode(const FuncGraphPtr &func_graph) { diff --git a/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc b/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc index a80d9e2e3d6..90cc55dbe40 100644 --- a/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc +++ b/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc @@ -136,6 +136,9 @@ STATUS DeleteRedundantTranspose::TransTransFusion(const FuncGraphPtr &func_graph if (!manager_->Replace(cnode, pre_cnode->input(1))) { MS_LOG(ERROR) << "replace old node failed, please check."; return lite::RET_ERROR; + } else { + func_graph->DropNode(cnode->input(kInputIndexTwo)); + func_graph->DropNode(pre_cnode->input(kInputIndexTwo)); } } }