add npu

2020-12-07 18:50:07 +08:00 · 2020-12-07 18:50:07 +08:00 · 65014d0988
parent c8dec08070
commit 65014d0988
40 changed files with 1948 additions and 43 deletions
--- a/build.sh
+++ b/build.sh
@ -23,7 +23,7 @@ usage()
 {
  echo "Usage:"
  echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
-  echo "              [-a on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|ascend|cpu] \\"
+  echo "              [-a on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|ascend|cpu|npu] \\"
  echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1|310|910] [-I arm64|arm32|x86_64] [-K] \\"
  echo "              [-B on|off] [-E] [-l on|off] [-n full|lite|off] [-T on|off] \\"
  echo "              [-A [cpp|java|object-c] [-C on|off] [-o on|off] [-S on|off] [-k on|off] [-W sse|neon|avx|off] \\"
@ -45,7 +45,7 @@ usage()
  echo "    -i Enable increment building, default off"
  echo "    -L Enable load ANF-IR as input of 'infer', default off"
  echo "    -j[n] Set the threads when building (Default: -j8)"
-  echo "    -e Use cpu, gpu, ascend"
+  echo "    -e Use cpu, gpu, npu or ascend"
  echo "    -P Enable dump anf graph to file in ProtoBuffer format, default on"
  echo "    -D Enable dumping of function graph ir, default on"
  echo "    -z Compile dataset & mindrecord, default on"
@ -121,7 +121,7 @@ checkopts()
  X86_64_SIMD="off"
  DEVICE_VERSION=""
  DEVICE=""
-
+  ENABLE_NPU="off"
  # Process the options
  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:D:zM:V:K:swB:En:T:A:C:o:S:k:W:' opt
  do
@ -379,6 +379,9 @@ parse_device()
      usage
      exit 1
    fi
  elif [[ "X$DEVICE" == "Xnpu" ]]; then
    ENABLE_NPU="on"
    ENABLE_CPU="on"
  elif [[ "X$DEVICE" == "Xcpu" ]]; then
    ENABLE_CPU="on"
  elif [[ "X$DEVICE" == "X" ]]; then
@ -497,6 +500,15 @@ checkndk() {
    fi
 }
 checkddk() {
    if [ "${HWHIAI_DDK}" ]; then
        echo -e "\e[31mHWHIAI_DDK=$HWHIAI_DDK  \e[0m"
    else
        echo -e "\e[31mplease set HWHIAI_DDK in environment variable for example: export HWHIAI_DDK=/root/usr/hwhiai-ddk-100.500.010.010/ \e[0m"
        exit 1
    fi
 }
 gene_flatbuffer() {
    FLAT_DIR="${BASEPATH}/mindspore/lite/schema"
    cd ${FLAT_DIR} && rm -rf "${FLAT_DIR}/inner" && mkdir -p "${FLAT_DIR}/inner"
@ -612,6 +624,9 @@ build_lite()
      echo "start build opencl"
      build_opencl
    fi
    if [ "${ENABLE_NPU}" == "on" ]; then
      checkddk
    fi
    if [ "${RUN_TESTCASES}" == "on" ]; then
        build_gtest
    fi
@ -634,7 +649,8 @@ build_lite()
              -DANDROID_STL=${ANDROID_STL} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_TRAIN=${SUPPORT_TRAIN}                     \
              -DPLATFORM_ARM64=on -DENABLE_NEON=on -DENABLE_FP16="off"      \
              -DENABLE_TOOLS=${ENABLE_TOOLS} -DENABLE_CONVERTER=${ENABLE_CONVERTER} -DBUILD_TESTCASES=${RUN_TESTCASES} \
-              -DSUPPORT_GPU=${ENABLE_GPU} -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
+              -DSUPPORT_GPU=${ENABLE_GPU} -DSUPPORT_NPU=${ENABLE_NPU} \
              -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
              -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp -DMS_VERSION_MAJOR=${VERSION_MAJOR}                           \
              -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
              "${BASEPATH}/mindspore/lite"
@ -645,14 +661,16 @@ build_lite()
              -DANDROID_STL=${ANDROID_STL}  -DCMAKE_BUILD_TYPE=${BUILD_TYPE}                                                      \
              -DPLATFORM_ARM32=on -DENABLE_NEON=on -DSUPPORT_TRAIN=${SUPPORT_TRAIN}  \
              -DENABLE_TOOLS=${ENABLE_TOOLS} -DENABLE_CONVERTER=${ENABLE_CONVERTER} -DBUILD_TESTCASES=${RUN_TESTCASES} \
-              -DSUPPORT_GPU=${ENABLE_GPU} -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
+              -DSUPPORT_GPU=${ENABLE_GPU} -DSUPPORT_NPU=${ENABLE_NPU} \
              -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
              -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp -DMS_VERSION_MAJOR=${VERSION_MAJOR}                           \
              -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
               "${BASEPATH}/mindspore/lite"
    else
        cmake -DPLATFORM_ARM64=off -DSUPPORT_TRAIN=${SUPPORT_TRAIN}   \
        -DENABLE_TOOLS=${ENABLE_TOOLS} -DENABLE_CONVERTER=${ENABLE_CONVERTER} -DBUILD_TESTCASES=${RUN_TESTCASES} \
-        -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_GPU=${ENABLE_GPU} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
+        -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_GPU=${ENABLE_GPU} -DSUPPORT_NPU=${ENABLE_NPU} \
        -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
        -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp  \
        -DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
        -DENABLE_VERBOSE=${ENABLE_VERBOSE} -DX86_64_SIMD=${X86_64_SIMD} "${BASEPATH}/mindspore/lite"
--- a/cmake/package_lite.cmake
+++ b/cmake/package_lite.cmake
@ -66,6 +66,11 @@ if (BUILD_MINDDATA STREQUAL "lite_cv")
 endif ()
 if (PLATFORM_ARM64)
    if (SUPPORT_NPU)
        install(FILES ${DDK_LIB_PATH}/libhiai.so DESTINATION ${MAIN_DIR}-${COMPONENT_NAME}/third_party/hiai_ddk/lib COMPONENT ${COMPONENT_NAME})
        install(FILES ${DDK_LIB_PATH}/libhiai_ir.so DESTINATION ${MAIN_DIR}-${COMPONENT_NAME}/third_party/hiai_ddk/lib COMPONENT ${COMPONENT_NAME})
        install(FILES ${DDK_LIB_PATH}/libhiai_ir_build.so DESTINATION ${MAIN_DIR}-${COMPONENT_NAME}/third_party/hiai_ddk/lib COMPONENT ${COMPONENT_NAME})
    endif()
    if (SUPPORT_TRAIN)
        install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
    else ()
--- a/mindspore/lite/CMakeLists.txt
+++ b/mindspore/lite/CMakeLists.txt
@ -17,6 +17,7 @@ option(ENABLE_FP16 "if build fp16 ops" off)
 option(ENABLE_TOOLS "if build tools" on)
 option(BUILD_TESTCASES "if build testcase" on)
 option(SUPPORT_GPU "if support gpu" off)
 option(SUPPORT_NPU "if support npu" off)
 option(OFFLINE_COMPILE "if offline compile OpenCL kernel" off)
 option(BUILD_MINDDATA_EXAMPLE "" on)
 option(ENABLE_VERBOSE "" off)
@ -39,14 +40,24 @@ if (PLATFORM_ARM64 OR PLATFORM_ARM32)
    set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
 endif()
 if (SUPPORT_GPU)
    set(PROCESS_UNIT gpu)
 elseif (SUPPORT_NPU)
    set(PROCESS_UNIT npu)
 else ()
    set(PROCESS_UNIT cpu)
 endif ()
 if (SUPPORT_NPU)
    set(DDK_PATH "$ENV{HWHIAI_DDK}/ddk/ai_ddk_lib")
    if (PLATFORM_ARM64)
        set(DDK_LIB_PATH ${DDK_PATH}/lib64)
    elseif (PLATFORM_ARM32)
        set(DDK_LIB_PATH ${DDK_PATH}/lib)
    endif()
    add_compile_definitions(SUPPORT_NPU)
 endif()
 add_compile_definitions(NO_DLIB)
 add_compile_options(-fPIC)
@ -61,7 +72,7 @@ if (SUPPORT_TRAIN)
        set(WIN_RUN_X86_NAME win-runtime-x86-${PROCESS_UNIT})
    else ()
        set(COMPONENT_NAME converter-ubuntu-train)
-    endif()
+    endif ()
    set(RUN_X86_COMPONENT_NAME runtime-x86-${PROCESS_UNIT}-train)
 else ()
    if (PLATFORM_ARM64)
@ -74,13 +85,13 @@ else ()
        set(WIN_RUN_X86_NAME win-runtime-x86-${PROCESS_UNIT})
    else ()
        set(COMPONENT_NAME converter-ubuntu)
-    endif()
+    endif ()
    if ("${X86_64_SIMD}" STREQUAL "sse")
        set(RUN_X86_COMPONENT_NAME runtime-x86-${X86_64_SIMD}-${PROCESS_UNIT})
    else ()
        set(RUN_X86_COMPONENT_NAME runtime-x86-${PROCESS_UNIT})
    endif ()
-endif()
+endif ()
 string(REPLACE "/mindspore/lite" "" TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 set(CORE_DIR ${TOP_DIR}/mindspore/core)
@ -121,14 +132,14 @@ else ()
    if (NOT WIN32)
        set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_SHARED_LINKER_FLAGS}")
        set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_EXE_LINKER_FLAGS}")
-    endif()
+    endif ()
 endif ()
 if (ENABLE_VERBOSE)
    set(CMAKE_VERBOSE_MAKEFILE on)
 endif ()
 if (SUPPORT_TRAIN)
    add_compile_definitions(SUPPORT_TRAIN)
-endif()
+endif ()
 if (ENABLE_NEON)
    add_compile_definitions(ENABLE_NEON)
 endif ()
@ -155,7 +166,7 @@ endif ()
 if (ENABLE_CONVERTER)
    if (PLATFORM_ARM)
        MESSAGE(FATAL_ERROR "Cannot build converter in arm platform")
-    endif()
+    endif ()
    include_directories(${PYTHON_INCLUDE_DIRS})
    include(${TOP_DIR}/cmake/external_libs/eigen.cmake)
    include(${TOP_DIR}/cmake/external_libs/protobuf.cmake)
@ -207,18 +218,18 @@ if (ENABLE_TOOLS)
    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
    if (SUPPORT_TRAIN)
        add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/net_train)
-    endif()
+    endif ()
-endif()
+endif ()
 if (NOT WIN32)
    if (ENABLE_TOOLS)
        if (NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
            add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/schema_gen)
            add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/lib_cropper)
        endif ()
-    endif()
+    endif ()
    if (BUILD_TESTCASES)
        add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test)
-    endif()
+    endif ()
 endif ()
 include(${TOP_DIR}/cmake/package_lite.cmake)
--- a/mindspore/lite/include/context.h
+++ b/mindspore/lite/include/context.h
@ -48,10 +48,16 @@ typedef struct {
  bool enable_float16_ = false; /**< prior enable float16 inference */
 } GpuDeviceInfo;
 /// \brief NpuDeviceInfo defined for NPU's configuration information.
 typedef struct {
  int frequency_ = 3; /**< npu frequency inference */
 } NpuDeviceInfo;
 /// \brief DeviceInfo defined for backend's configuration information.
 union DeviceInfo {
  CpuDeviceInfo cpu_device_info_;
  GpuDeviceInfo gpu_device_info_;
  NpuDeviceInfo npu_device_info_;
 };
 /// \brief DeviceContext defined for holding backend's configuration information.
--- a/mindspore/lite/src/CMakeLists.txt
+++ b/mindspore/lite/src/CMakeLists.txt
@ -88,6 +88,12 @@ else ()
    target_link_libraries(mindspore-lite cpu_kernel_mid nnacl cpu_ops_mid)
    target_link_libraries(mindspore-lite_static cpu_kernel_mid nnacl_mid cpu_ops_mid)
 endif ()
 if (SUPPORT_NPU)
    add_subdirectory(runtime/agent/npu)
    include_directories(${DDK_PATH})
    target_link_libraries(mindspore-lite npu_kernel_mid)
    target_link_libraries(mindspore-lite_static npu_kernel_mid)
 endif ()
 if (PLATFORM_ARM32 OR PLATFORM_ARM64)
    target_link_libraries(mindspore-lite log)
    target_link_libraries(mindspore-lite_static log)
--- a/mindspore/lite/src/inner_context.cc
+++ b/mindspore/lite/src/inner_context.cc
@ -17,6 +17,9 @@
 #include "src/inner_context.h"
 #include "include/errorcode.h"
 #include "src/common/log_adapter.h"
 #ifdef SUPPORT_NPU
 #include "src/runtime/agent/npu/npu_manager.h"
 #endif
 namespace mindspore::lite {
 InnerContext::InnerContext(const Context *context) {
@ -74,10 +77,12 @@ int InnerContext::IsValid() {
    return RET_NOT_SUPPORT;
  }
 #endif
 #ifndef SUPPORT_NPU
  if (IsNpuEnabled()) {
    MS_LOG(ERROR) << "NPU is not supported.";
    return RET_NOT_SUPPORT;
  }
 #endif
  return RET_OK;
 }
@ -108,9 +113,14 @@ bool InnerContext::IsGpuEnabled() {
 }
 bool InnerContext::IsNpuEnabled() {
 #ifdef SUPPORT_NPU
  return this->device_list_.end() !=
-         std::find_if(this->device_list_.begin(), this->device_list_.end(),
+           std::find_if(this->device_list_.begin(), this->device_list_.end(),
-                      [](const DeviceContext &device) { return device.device_type_ == DT_NPU; });
+                        [](const DeviceContext &device) { return device.device_type_ == DT_NPU; }) &&
         mindspore::lite::NPUManager::GetInstance()->IsSupportNPU();
 #else
  return false;
 #endif
 }
 CpuDeviceInfo InnerContext::GetCpuInfo() {
@ -132,4 +142,15 @@ GpuDeviceInfo InnerContext::GetGpuInfo() {
    return iter->device_info_.gpu_device_info_;
  }
 }
 NpuDeviceInfo InnerContext::GetNpuInfo() const {
  auto iter = std::find_if(this->device_list_.begin(), this->device_list_.end(),
                           [](const DeviceContext &device) { return device.device_type_ == DT_NPU; });
  if (iter == this->device_list_.end()) {
    return {};
  } else {
    return iter->device_info_.npu_device_info_;
  }
 }
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/inner_context.h
+++ b/mindspore/lite/src/inner_context.h
@ -47,6 +47,8 @@ struct InnerContext : public Context {
  GpuDeviceInfo GetGpuInfo();
  NpuDeviceInfo GetNpuInfo() const;
  int IsValid();
  virtual ~InnerContext();
--- a/mindspore/lite/src/lite_session.cc
+++ b/mindspore/lite/src/lite_session.cc
@ -27,7 +27,10 @@
 #include "src/common/graph_util.h"
 #include "src/kernel_registry.h"
 #include "src/model_common.h"
-#include "mindspore/lite/src/runtime/kernel/arm/base/dequant.h"
+#include "src/runtime/kernel/arm/base/dequant.h"
 #if SUPPORT_NPU
 #include "src/runtime/agent/npu/npu_manager.h"
 #endif
 namespace mindspore {
 namespace lite {
@ -330,6 +333,14 @@ int LiteSession::CompileGraph(Model *model) {
    is_running_.store(false);
    return ret;
  }
 #if SUPPORT_NPU
  if (this->context_->IsNpuEnabled()) {
    if (mindspore::lite::NPUManager::GetInstance()->LoadOMModel() != RET_OK) {
      MS_LOG(ERROR) << "NPU client load model failed.";
      return RET_ERROR;
    }
  }
 #endif
  ret = executor_->Prepare(this->kernels_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Prepare executor failed: " << ret;
@ -410,19 +421,18 @@ int LiteSession::Init(const Context *context) {
    is_running_.store(false);
    return ret;
  }
-#if SUPPORT_GPU
+  ret = InitGPURuntime();
-  if (this->context_->IsGpuEnabled()) {
+  if (ret != RET_OK) {
-    auto gpu_device_info = this->context_->GetGpuInfo();
+    MS_LOG(ERROR) << "Init GPU runtime failed.";
-    auto opencl_runtime = ocl_runtime_wrap_.GetInstance();
+    is_running_.store(false);
-    opencl_runtime->SetFp16Enable(gpu_device_info.enable_float16_);
+    return ret;
-    if (opencl_runtime->Init() != RET_OK) {
+  }
-      this->context_->device_list_ = {{DT_CPU, {gpu_device_info.enable_float16_, MID_CPU}}};
+  ret = InitNPURuntime();
-      MS_LOG(WARNING) << "Init OpenCL runtime failed, change to CPU mode.";
+  if (ret != RET_OK) {
-    } else {
+    MS_LOG(ERROR) << "Init NPU runtime failed.";
-      MS_LOG(INFO) << "Init OpenCL runtime success.";
+    is_running_.store(false);
-    }
+    return ret;
  }
 #endif
  executor_ = new (std::nothrow) Executor();
  if (nullptr == executor_) {
    MS_LOG(ERROR) << "New Executor failed";
@ -573,6 +583,35 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
  is_running_.store(false);
  return RET_OK;
 }
 int LiteSession::InitNPURuntime() {
 #if SUPPORT_NPU
  if (this->context_->IsNpuEnabled()) {
    if (mindspore::lite::NPUManager::GetInstance()->InitClient() != RET_OK) {
      MS_LOG(ERROR) << "NPU client init error.";
      return RET_ERROR;
    }
  }
 #endif
  return RET_OK;
 }
 int LiteSession::InitGPURuntime() {
 #if SUPPORT_GPU
  if (this->context_->IsGpuEnabled()) {
    auto gpu_device_info = this->context_->GetGpuInfo();
    auto opencl_runtime = ocl_runtime_wrap_.GetInstance();
    opencl_runtime->SetFp16Enable(gpu_device_info.enable_float16_);
    if (opencl_runtime->Init() != RET_OK) {
      this->context_->device_list_ = {{DT_CPU, {gpu_device_info.enable_float16_, MID_CPU}}};
      MS_LOG(WARNING) << "Init OpenCL runtime failed, change to CPU mode.";
    } else {
      MS_LOG(INFO) << "Init OpenCL runtime success.";
    }
  }
 #endif
  return RET_OK;
 }
 }  // namespace lite
 session::LiteSession *session::LiteSession::CreateSession(const lite::Context *context) {
--- a/mindspore/lite/src/lite_session.h
+++ b/mindspore/lite/src/lite_session.h
@ -96,6 +96,10 @@ class LiteSession : public session::LiteSession {
 private:
  void ResetInputsShape(const std::vector<std::vector<int>> &dims);
  int InitNPURuntime();
  int InitGPURuntime();
 protected:
  InnerContext *context_ = nullptr;
  std::vector<kernel::LiteKernel *> kernels_;
--- a/mindspore/lite/src/runtime/agent/npu/CMakeLists.txt
+++ b/mindspore/lite/src/runtime/agent/npu/CMakeLists.txt
@ -0,0 +1,23 @@
 include_directories(${DDK_PATH})
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kernel)
 file(GLOB_RECURSE NPU_RUNTIME_SRC
        ${CMAKE_CURRENT_SOURCE_DIR}/*.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/../../kernel/npu/*.cc
        )
 add_library(hiai SHARED IMPORTED)
 set_target_properties(hiai PROPERTIES IMPORTED_LOCATION
        ${DDK_LIB_PATH}/libhiai.so)
 add_library(hiai_ir SHARED IMPORTED)
 set_target_properties(hiai_ir PROPERTIES IMPORTED_LOCATION
        ${DDK_LIB_PATH}/libhiai_ir.so)
 add_library(hiai_ir_build SHARED IMPORTED)
 set_target_properties(hiai_ir_build PROPERTIES IMPORTED_LOCATION
        ${DDK_LIB_PATH}/libhiai_ir_build.so)
 add_library(npu_kernel_mid OBJECT ${NPU_RUNTIME_SRC})
 target_link_libraries(
        npu_kernel_mid
        hiai
        hiai_ir
        hiai_ir_build
 )
--- a/mindspore/lite/src/runtime/agent/npu/npu_converter_utils.cc
+++ b/mindspore/lite/src/runtime/agent/npu/npu_converter_utils.cc
@ -0,0 +1,160 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/agent/npu/npu_converter_utils.h"
 namespace mindspore::lite {
 ge::Shape ConverterToNPUShape(const std::vector<int> &src_shape) {
  vector<int64_t> shapes;
  shapes.reserve(src_shape.size());
  for (int i = 0; i < src_shape.size(); i++) {
    shapes.push_back(src_shape[i]);
  }
  return ge::Shape({shapes});
 }
 ge::Format ConverterToNPUFormat(schema::Format format) {
  ge::Format ge_format;
  switch (format) {
    case schema::Format_NCHW:
      ge_format = ge::FORMAT_NCHW;
      break;
    case schema::Format_NHWC:
      ge_format = ge::FORMAT_NHWC;
      break;
    default:
      MS_LOG(ERROR) << "Unsupported format:" << format;
      // use unused format to indicate errors.
      ge_format = ge::FORMAT_ND;
      break;
  }
  return ge_format;
 }
 ge::DataType ConverterToNPUDataType(TypeId type_id) {
  ge::DataType data_type;
  switch (type_id) {
    case kNumberTypeFloat:
    case kNumberTypeFloat32:
      data_type = ge::DT_FLOAT;
      break;
    case kNumberTypeFloat16:
      data_type = ge::DT_FLOAT16;
      break;
    case kNumberTypeInt8:
      data_type = ge::DT_INT8;
      break;
    case kNumberTypeUInt8:
      data_type = ge::DT_UINT8;
      break;
    case kNumberTypeInt16:
      data_type = ge::DT_INT16;
      break;
    case kNumberTypeInt32:
      data_type = ge::DT_INT32;
      break;
    case kNumberTypeUInt32:
      data_type = ge::DT_UINT32;
      break;
    default:
      data_type = ge::DT_UNDEFINED;
      break;
  }
  return data_type;
 }
 hiai::op::Data *ConverterToNPUData(Tensor *src, const std::string &name) {
  auto data = new (std::nothrow) hiai::op::Data(name);
  if (data == nullptr) {
    MS_LOG(ERROR) << "new data failed.";
    return data;
  }
  ge::TensorDesc tensor_desc(ConverterToNPUShape(src->shape()), ConverterToNPUFormat(src->format()),
                             ConverterToNPUDataType(src->data_type()));
  data->update_input_desc_x(tensor_desc);
  return data;
 }
 std::shared_ptr<ge::Tensor> ConverterToNPUTensor(Tensor *src) {
  std::shared_ptr<ge::Tensor> ge_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
  if (ge_tensor == nullptr) {
    MS_LOG(ERROR) << "new ge_tensor failed.";
    return ge_tensor;
  }
  ge::TensorDesc tensor_desc(ConverterToNPUShape(src->shape()), ConverterToNPUFormat(src->format()),
                             ConverterToNPUDataType(src->data_type()));
  ge_tensor->SetTensorDesc(tensor_desc);
  if (src->data_c() != nullptr) {
    ge_tensor->SetData(reinterpret_cast<const uint8_t *>(src->data_c()), src->Size());
  }
  return ge_tensor;
 }
 /*
 *    mode  : Activation mode, with options as follows:
 *            0 : Sigmoid
 *            1 : ReLU
 *            2 : Tanh
 *            3 : Clipped ReLU
 *            4 : ELU
 *            5 : PReLU
 *            6 : Abs
 *            7 : Relu1
 *            8 : Softsign
 *            9 : Softplus
 *            10 : Hardsigmoid
 *            11 : Threshold ReLU
 *            12 : Selu
 *            13 : Linear
 *            14 : Relu6
 *            15 : GeLU.
 */
 int ConverterToNPUActMode(schema::ActivationType type) {
  switch (type) {
    case schema::ActivationType_NO_ACTIVATION:
      return -1;
    case schema::ActivationType_SIGMOID:
      return 0;
    case schema::ActivationType_RELU:
      return 1;
    case schema::ActivationType_TANH:
      return 2;
    case schema::ActivationType_ELU:
      return 4;
    case schema::ActivationType_LEAKY_RELU:
      return 5;
    case schema::ActivationType_ABS:
      return 6;
    case schema::ActivationType_RELU1:
      return 7;
    case schema::ActivationType_SOFTSIGN:
      return 8;
    case schema::ActivationType_SOFTPLUS:
      return 9;
    case schema::ActivationType_HSIGMOID:
      return 10;
    case schema::ActivationType_THRESHOLDRELU:
      return 11;
    case schema::ActivationType_SELU:
      return 12;
    case schema::ActivationType_LINEAR:
      return 13;
    case schema::ActivationType_RELU6:
      return 14;
    default:
      MS_LOG(ERROR) << "Unsupport activation type to NPU." << type;
      return -1;
  }
 }
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/runtime/agent/npu/npu_converter_utils.h
+++ b/mindspore/lite/src/runtime/agent/npu/npu_converter_utils.h
@ -0,0 +1,42 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONVERTER_UITLS_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONVERTER_UITLS_H_
 #include <string>
 #include <memory>
 #include <vector>
 #include "schema/ops_generated.h"
 #include "include/graph/tensor.h"
 #include "include/graph/op/array_defs.h"
 #include "src/tensor.h"
 namespace mindspore::lite {
 std::shared_ptr<ge::Tensor> ConverterToNPUTensor(Tensor *src);
 hiai::op::Data *ConverterToNPUData(Tensor *src, const std::string &name);
 ge::Format ConverterToNPUFormat(schema::Format format);
 ge::DataType ConverterToNPUDataType(TypeId type_id);
 ge::Shape ConverterToNPUShape(const std::vector<int> &src_shape);
 int ConverterToNPUActMode(schema::ActivationType type);
 }  // namespace mindspore::lite
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONVERTER_UITLS_H_
--- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
+++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
@ -0,0 +1,131 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/agent/npu/npu_executor.h"
 #include "include/errorcode.h"
 #include "src/runtime/agent/npu/npu_manager.h"
 namespace mindspore::lite {
 int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
  this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient();
  if (this->client_ == nullptr) {
    MS_LOG(ERROR) << "client is nullptr.";
    return RET_ERROR;
  }
  if (GetIOTensorVec() != RET_OK) {
    MS_LOG(ERROR) << "Load model failed.";
    return RET_ERROR;
  }
  return RET_OK;
 }
 int NPUExecutor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_tensors,
                     std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator, const KernelCallBack &before,
                     const KernelCallBack &after) {
  hiai::AiContext context;
  for (int i = 0; i < npu_input_tensors_.size(); ++i) {
    memcpy(npu_input_tensors_[i]->GetBuffer(), in_tensors[i]->data_c(), in_tensors[i]->Size());
  }
  context.AddPara("model_name", model_name_);
  if (this->client_ == nullptr) {
    MS_LOG(ERROR) << "NPU client is nullptr";
    return RET_ERROR;
  }
  int stamp;
  int ret = this->client_->Process(context, this->npu_input_tensors_, this->npu_output_tensors_, 1000, stamp);
  if (ret != hiai::AI_SUCCESS) {
    MS_LOG(ERROR) << "NPU Process failed. code is " << ret;
    return RET_ERROR;
  }
  for (int i = 0; i < npu_output_tensors_.size(); ++i) {
    memcpy(out_tensors[i]->MutableData(), npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
  }
  return RET_OK;
 }
 int NPUExecutor::GetIOTensorVec() {
  std::vector<hiai::TensorDimension> input_dimension;
  std::vector<hiai::TensorDimension> output_dimension;
  input_dimension.clear();
  output_dimension.clear();
  if (this->client_ == nullptr) {
    MS_LOG(ERROR) << "client is nullptr.";
    return RET_ERROR;
  }
  auto ret = this->client_->GetModelIOTensorDim(model_name_, input_dimension, output_dimension);
  if (ret != hiai::AI_SUCCESS) {
    MS_LOG(ERROR) << "Get model input and output tensor dims failed." << ret;
    return RET_ERROR;
  }
  ret = UpdateInputTensorVec(input_dimension);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Update input tensor vector failed. " << ret;
    return RET_ERROR;
  }
  ret = UpdateOutputTensorVec(output_dimension);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Update output tensor vector failed. " << ret;
    return RET_ERROR;
  }
  return RET_OK;
 }
 int NPUExecutor::UpdateInputTensorVec(const std::vector<hiai::TensorDimension> &input_dimension) {
  if (input_dimension.empty()) {
    MS_LOG(ERROR) << "npu input tensor dimension is empty.";
    return RET_ERROR;
  }
  npu_input_tensors_.resize(input_dimension.size());
  npu_input_tensors_.clear();
  for (const auto &inDim : input_dimension) {
    std::shared_ptr<hiai::AiTensor> input = std::make_shared<hiai::AiTensor>();
    if (input->Init(&inDim) != hiai::AI_SUCCESS) {
      MS_LOG(ERROR) << "Input AiTensor init failed.";
      return RET_ERROR;
    }
    npu_input_tensors_.push_back(input);
  }
  if (npu_input_tensors_.empty()) {
    MS_LOG(ERROR) << "NPU input tensor is empty.";
    return RET_ERROR;
  }
  return RET_OK;
 }
 int NPUExecutor::UpdateOutputTensorVec(const std::vector<hiai::TensorDimension> &output_dimension) {
  if (output_dimension.empty()) {
    MS_LOG(ERROR) << "output_dimension_ is empty.";
    return RET_ERROR;
  }
  npu_output_tensors_.resize(output_dimension.size());
  npu_output_tensors_.clear();
  for (const auto &outDim : output_dimension) {
    std::shared_ptr<hiai::AiTensor> output = std::make_shared<hiai::AiTensor>();
    int ret = output->Init(&outDim);
    if (ret != hiai::AI_SUCCESS) {
      return RET_ERROR;
    }
    npu_output_tensors_.push_back(output);
  }
  if (npu_output_tensors_.empty()) {
    MS_LOG(ERROR) << "NPU output tensor is empty.";
    return RET_ERROR;
  }
  return RET_OK;
 }
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/runtime/agent/npu/npu_executor.h
+++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.h
@ -0,0 +1,52 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_EXECUTOR_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_EXECUTOR_H_
 #include <string>
 #include <memory>
 #include <utility>
 #include <vector>
 #include "src/executor.h"
 #include "include/errorcode.h"
 #include "include/HiAiModelManagerService.h"
 namespace mindspore::lite {
 class NPUExecutor : public Executor {
 public:
  explicit NPUExecutor(const std::string &model_name) { this->model_name_ = model_name; }
  ~NPUExecutor() override = default;
  int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override;
  int Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_tensors,
          std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr,
          const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr) override;
 private:
  int GetIOTensorVec();
  int UpdateInputTensorVec(const std::vector<hiai::TensorDimension> &input_dimension);
  int UpdateOutputTensorVec(const std::vector<hiai::TensorDimension> &output_dimension);
 private:
  std::string model_name_;
  std::shared_ptr<hiai::AiModelMngerClient> client_ = nullptr;
  std::vector<std::shared_ptr<hiai::AiTensor>> npu_input_tensors_;
  std::vector<std::shared_ptr<hiai::AiTensor>> npu_output_tensors_;
 };
 }  // namespace mindspore::lite
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_EXECUTOR_H_
--- a/mindspore/lite/src/runtime/agent/npu/npu_manager.cc
+++ b/mindspore/lite/src/runtime/agent/npu/npu_manager.cc
@ -0,0 +1,217 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/agent/npu/npu_manager.h"
 #include <sys/fcntl.h>
 #include <unistd.h>
 #include "include/hiai_ir_build.h"
 #include "include/HiAiModelManagerService.h"
 #include "include/errorcode.h"
 #include "include/graph/op/all_ops.h"
 #include "src/common/file_utils.h"
 namespace mindspore::lite {
 bool NPUManager::IsSupportNPU() {
  if (!is_npu_check_executor) {
    CheckSupportNPU();
  }
  if (is_support_npu) {
    MS_LOG(INFO) << "The current device support NPU.";
    return true;
  } else {
    MS_LOG(INFO) << "The current device NOT SUPPORT NPU.";
    return false;
  }
 }
 std::string NPUManager::GetExecutorPath() {
  std::string executor_path;
  char cmdline[1024] = {0};
  int fd = open("/proc/self/cmdline", O_RDONLY);
  if (fd >= 0) {
    char ch;
    int i = 0;
    while (read(fd, &ch, sizeof(ch)) > 0 && !isspace(ch)) {
      if (':' == ch) {
        break;
      }
      cmdline[i] = ch;
      i++;
    }
    close(fd);
  }
  executor_path = std::string(cmdline);
  if (executor_path.empty()) {
    executor_path = "./";
  }
  // android
  if (executor_path.substr(0, 11) == "/data/data/") {
    executor_path = executor_path + '/';
  } else {
    // Linux
    executor_path = executor_path.substr(0, executor_path.rfind('/')) + "/";
  }
  return executor_path;
 }
 bool NPUManager::IsKirinChip() {
  std::ifstream cpu_info("/proc/cpuinfo");
  if (!(cpu_info.good() && cpu_info.is_open())) {
    return false;
  }
  std::string line;
  while (!cpu_info.eof()) {
    getline(cpu_info, line);
    if (line.find("Hardware") == string::npos) {
      continue;
    }
    auto index = line.find("Kirin");
    if (index == string::npos) {
      continue;
    }
    auto kirin_number_str = line.substr(index + 5);
    auto kirin_number = atoi(kirin_number_str.c_str());
    if (kirin_number >= 985 || kirin_number == 810 || kirin_number == 820) {
      cpu_info.close();
      return true;
    } else {
      cpu_info.close();
      return false;
    }
  }
  return false;
 }
 bool WriteToOMFile(domi::ModelBufferData om_model_buff, const std::string &om_file_path) {
  FILE *fp;
  fp = fopen(om_file_path.c_str(), "wb");
  if (fp == nullptr) {
    MS_LOG(ERROR) << om_file_path.c_str() << " open failed.";
    return false;
  }
  auto write_size = (uint32_t)fwrite(om_model_buff.data, 1, om_model_buff.length, fp);
  if (write_size != om_model_buff.length) {
    fclose(fp);
    MS_LOG(ERROR) << "Write om file failed.";
    return false;
  }
  fclose(fp);
  return true;
 }
 bool NPUManager::CheckOmBuildIr(const std::string &path) {
  // build test om model
  std::shared_ptr<hiai::op::Add> add_op(new (std::nothrow) hiai::op::Add("add"));
  if (add_op == nullptr) {
    MS_LOG(ERROR) << "new add_op failed.";
    return false;
  }
  ge::TensorDesc desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT);
  std::shared_ptr<hiai::op::Data> data = std::make_shared<hiai::op::Data>("data");
  data->update_input_desc_x(desc);
  add_op->set_input_x1(*data);
  add_op->set_input_x2(*data);
  domi::HiaiIrBuild ir_build;
  ge::Graph ir_graph("graph");
  std::vector<ge::Operator> inputs{*data, *data};
  std::vector<ge::Operator> outputs{*add_op};
  ir_graph.SetInputs(inputs).SetOutputs(outputs);
  ge::Model om_model("test_model", "test_version");
  om_model.SetGraph(ir_graph);
  domi::ModelBufferData om_model_buff;
  if (!ir_build.CreateModelBuff(om_model, om_model_buff)) {
    MS_LOG(ERROR) << "Create model buffer failed.";
    return false;
  }
  if (!ir_build.BuildIRModel(om_model, om_model_buff)) {
    MS_LOG(ERROR) << "Build IR model failed.";
    return false;
  }
  // save test om model
  remove(path.c_str());
  bool ret = WriteToOMFile(om_model_buff, path);
  ir_build.ReleaseModelBuff(om_model_buff);
  return ret;
 }
 void NPUManager::CheckSupportNPU() {
  is_npu_check_executor = true;
  std::string path_string = GetExecutorPath();
  std::string test_model_path = path_string + "/mindspore_lite_test_npu.om";
  std::ifstream ifs(test_model_path);
  if (ifs.good() && ifs.is_open()) {
    ifs.close();
    is_support_npu = true;
    return;
  }
  if (!IsKirinChip()) {
    MS_LOG(ERROR) << "The current device chip NOT SUPPORT NPU";
    is_support_npu = false;
    return;
  }
  if (!CheckOmBuildIr(test_model_path)) {
    MS_LOG(ERROR) << "Build OM IR error.";
    is_support_npu = false;
    return;
  }
  is_support_npu = true;
 }
 int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency) {
  hiai::MemBuffer *buffer = mc_builder_->InputMemBufferCreate(model_buf, size);
  if (buffer == nullptr) {
    MS_LOG(ERROR) << "MemBuffer is null.";
    return RET_ERROR;
  }
  auto desc = std::make_shared<hiai::AiModelDescription>(model_name, frequency, 0, 0, 0);
  desc->SetModelBuffer(buffer->GetMemBufferData(), buffer->GetMemBufferSize());
  model_desc_.push_back(desc);
  mc_builder_->MemBufferDestroy(buffer);
  return RET_OK;
 }
 int NPUManager::InitClient() {
  this->client_ = std::make_shared<hiai::AiModelMngerClient>();
  if (this->client_ == nullptr) {
    return RET_ERROR;
  }
  int ret = this->client_->Init(nullptr);
  if (ret != hiai::AI_SUCCESS) {
    return RET_ERROR;
  }
  mc_builder_ = std::make_shared<hiai::AiModelBuilder>(this->client_);
  return RET_OK;
 }
 int NPUManager::LoadOMModel() {
  int ret = this->client_->Load(model_desc_);
  if (ret != hiai::AI_SUCCESS) {
    MS_LOG(ERROR) << "Client load model failed." << ret;
    return RET_ERROR;
  }
  return RET_OK;
 }
 std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient() { return client_; }
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/runtime/agent/npu/npu_manager.h
+++ b/mindspore/lite/src/runtime/agent/npu/npu_manager.h
@ -0,0 +1,68 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_
 #include <string>
 #include <memory>
 #include <vector>
 #include "include/HiAiModelManagerService.h"
 namespace mindspore::lite {
 class NPUManager {
 public:
  static NPUManager *GetInstance() {
    static NPUManager npuManager;
    return &npuManager;
  }
  bool IsSupportNPU();
  int InitClient();
  // provide to subgraph to add model.
  int AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency);
  // scheduler to load om model.
  int LoadOMModel();
  // provide to executor.
  std::shared_ptr<hiai::AiModelMngerClient> GetClient();
 private:
  void CheckSupportNPU();
  bool IsKirinChip();
  bool CheckOmBuildIr(const std::string &path);
  std::string GetExecutorPath();
 private:
  bool is_npu_check_executor = false;
  bool is_support_npu = false;
  std::shared_ptr<hiai::AiModelMngerClient> client_ = nullptr;
  std::vector<std::shared_ptr<hiai::AiModelDescription>> model_desc_;
  std::shared_ptr<hiai::AiModelBuilder> mc_builder_ = nullptr;
 };
 }  // namespace mindspore::lite
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_
--- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc
+++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc
@ -0,0 +1,187 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/agent/npu/subgraph_npu_kernel.h"
 #include "include/errorcode.h"
 #include "src/runtime/agent/npu/npu_executor.h"
 #include "include/graph/operator.h"
 #include "include/graph/graph.h"
 #include "src/tensor.h"
 #include "include/graph/model.h"
 #include "include/hiai_ir_build.h"
 #include "include/HiAiModelManagerService.h"
 #include "include/HiAiModelManagerType.h"
 #include "include/context.h"
 #include "include/version.h"
 #include "include/graph/op/array_defs.h"
 #include "src/common/file_utils.h"
 #include "src/common/common.h"
 #include "src/common/utils.h"
 #include "src/runtime/agent/npu/npu_converter_utils.h"
 #include "mindspore/lite/src/runtime/kernel/npu/npu_kernel.h"
 #include "src/runtime/agent/npu/npu_manager.h"
 namespace mindspore::kernel {
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() {
  ge::Graph graph("NPUGraph");
  auto ret = BuildNPUInputOp();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Build NPU input operator failed.";
    return nullptr;
  }
  ret = BuildNPUOutputOp();
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Build NPU output operator failed.";
    return nullptr;
  }
  graph.SetInputs(subgraph_input_op_).SetOutputs(subgraph_output_op_);
  ge::Model model(GetOMModelName(), mindspore::lite::Version());
  model.SetGraph(graph);
  domi::HiaiIrBuild ir_build;
  auto om_model_buff = new (std::nothrow) domi::ModelBufferData;
  if (om_model_buff == nullptr) {
    MS_LOG(ERROR) << "om model buffer is nullptr.";
    return nullptr;
  }
  if (!ir_build.CreateModelBuff(model, *om_model_buff)) {
    MS_LOG(ERROR) << "Create model buffer failed.";
    delete om_model_buff;
    return nullptr;
  }
  if (!ir_build.BuildIRModel(model, *om_model_buff)) {
    MS_LOG(ERROR) << "Build IR model failed.";
    ir_build.ReleaseModelBuff(*om_model_buff);
    delete om_model_buff;
    return nullptr;
  }
  return om_model_buff;
 }
 int SubGraphNpuKernel::Run() { return this->executor_->Run(in_tensors_, out_tensors_, nodes_, nullptr); }
 int SubGraphNpuKernel::BuildNPUInputOp() {
  int count = 0;
  subgraph_input_op_.clear();
  for (auto node : this->nodes_) {
    std::vector<ge::Operator *> node_input_op;
    for (auto in_tensor : node->in_tensors()) {
      if (IsSubGraphInputTensor(in_tensor)) {
        auto tensor_name = node->name() + "_" + std::to_string(count++);
        auto data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name);
        subgraph_input_op_.push_back(*data);
        node_input_op.push_back(data);
        continue;
      }
      bool is_weight_tensor = true;
      for (auto in_kernel : node->in_kernels()) {
        if (IsContain(in_kernel->out_tensors(), in_tensor)) {
          if (in_kernel->desc().arch == mindspore::kernel::kNPU) {
            // input come from npu
            auto npu_op = reinterpret_cast<NPUKernel *>(in_kernel)->GetNPUOp();
            if (npu_op != nullptr) {
              npu_op->GetOutputDesc(0).GetName();
              node_input_op.push_back(npu_op);
              is_weight_tensor = false;
              break;
            } else {
              MS_LOG(ERROR) << in_kernel->type_str() << "NPU Operator is nullptr.";
              return RET_ERROR;
            }
          } else {
            MS_LOG(ERROR) << "The input of the intermediate node comes from the CPU";
            return RET_ERROR;
          }
        }
      }
      // weight tensor
      if (is_weight_tensor) {
        if (!(node->Type() == schema::PrimitiveType_Conv2D || node->Type() == schema::PrimitiveType_DeConv2D ||
              node->Type() == schema::PrimitiveType_DepthwiseConv2D ||
              node->Type() == schema::PrimitiveType_DeDepthwiseConv2D)) {
          auto name = node->name() + "_" + std::to_string(count++);
          auto weight_const = new (std::nothrow) hiai::op::Const(node->name() + "_" + std::to_string(count++));
          if (weight_const == nullptr) {
            MS_LOG(ERROR) << "new weight const failed.";
            return RET_ERROR;
          }
          auto weight_tensor = mindspore::lite::ConverterToNPUTensor(in_tensor);
          weight_const->set_attr_value(weight_tensor);
          node_input_op.push_back(weight_const);
        }
      }
    }
    // set input to NPU
    reinterpret_cast<NPUKernel *>(node)->SetNPUInputs(node->in_tensors(), node->out_tensors(), node_input_op);
  }
  return RET_OK;
 }
 bool SubGraphNpuKernel::IsSubGraphInputTensor(lite::Tensor *inputs) { return IsContain(this->in_tensors(), inputs); }
 std::vector<ge::Operator> SubGraphNpuKernel::GetNPUNodes(const vector<kernel::LiteKernel *> &nodes) {
  std::vector<ge::Operator> ops;
  ops.reserve(nodes.size());
  for (int i = 0; i < nodes.size(); i++) {
    ops.push_back(*reinterpret_cast<NPUKernel *>(nodes[i])->GetNPUOp());
  }
  return ops;
 }
 int SubGraphNpuKernel::BuildNPUOutputOp() {
  subgraph_output_op_ = GetNPUNodes(out_nodes_);
  if (subgraph_output_op_.empty()) {
    MS_LOG(ERROR) << "NPU subgraph output op is empty.";
    return RET_ERROR;
  }
  return RET_OK;
 }
 void SubGraphNpuKernel::SetIndex(int index) { this->index_ = index; }
 std::string SubGraphNpuKernel::GetOMModelName() { return this->name_ + std::to_string(index_) + ".om"; }
 int SubGraphNpuKernel::Init() {
  model_buffer_data_ = BuildIRModel();
  if (model_buffer_data_ == nullptr) {
    MS_LOG(ERROR) << "Build IR model failed.";
    return RET_ERROR;
  }
  mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data_->data, model_buffer_data_->length,
                                                       GetOMModelName(), context_->GetNpuInfo().frequency_);
  executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName());
  if (executor_ == nullptr) {
    MS_LOG(ERROR) << "Create NPUExecutor failed.";
    return RET_ERROR;
  }
  return RET_OK;
 }
 int SubGraphNpuKernel::Prepare() {
  if (executor_->Prepare(nodes_) != RET_OK) {
    MS_LOG(ERROR) << "NPU executor prepare failed.";
    return RET_ERROR;
  }
  return RET_OK;
 }
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h
+++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h
@ -0,0 +1,84 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_
 #include <vector>
 #include <string>
 #include "include/hiai_ir_build.h"
 #include "src/sub_graph_kernel.h"
 #include "src/runtime/agent/npu/npu_executor.h"
 #include "include/graph/op/all_ops.h"
 namespace mindspore::kernel {
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 class SubGraphNpuKernel : public SubGraphKernel {
 public:
  SubGraphNpuKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                    const std::vector<kernel::LiteKernel *> &inKernels,
                    const std::vector<kernel::LiteKernel *> &outKernels, const std::vector<kernel::LiteKernel *> &nodes,
                    const lite::InnerContext *ctx = nullptr)
      : SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx) {
    subgraph_type_ = kNpuSubGraph;
    this->name_ = "NpuSubGraph";
  }
  ~SubGraphNpuKernel() override = default;
  int Init() override;
  int Prepare() override;
  int PreProcess() override { return RET_OK; }
  int Run() override;
  int Run(const KernelCallBack &before, const KernelCallBack &after) override { return this->Run(); }
  int PostProcess() override { return RET_OK; }
  int ReSize() override {
    MS_LOG(ERROR) << "NPU does not support the resize function temporarily.";
    return RET_ERROR;
  }
  void SetIndex(int index);
 private:
  domi::ModelBufferData *BuildIRModel();
  int BuildNPUInputOp();
  int BuildNPUOutputOp();
  std::vector<ge::Operator> GetNPUNodes(const std::vector<kernel::LiteKernel *> &nodes);
  bool IsSubGraphInputTensor(lite::Tensor *inputs);
  std::string GetOMModelName();
 private:
  int index_;
  domi::ModelBufferData *model_buffer_data_;
  std::vector<ge::Operator> subgraph_input_op_;
  std::vector<ge::Operator> subgraph_output_op_;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_
--- a/mindspore/lite/src/runtime/kernel/npu/add_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/add_npu.cc
@ -0,0 +1,50 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/kernel/npu/add_npu.h"
 #include "include/graph/op/all_ops.h"
 #include "src/kernel_registry.h"
 using mindspore::kernel::KERNEL_ARCH::kNPU;
 using mindspore::lite::KernelRegistrar;
 using mindspore::schema::PrimitiveType_Add;
 namespace mindspore::kernel {
 int AddNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                            OpParameter *opParameter) {
  if (inputs[0]->shape() != inputs[1]->shape()) {
    MS_LOG(INFO) << "ddk 500 does not support broadcast."
                 << " shape 1 is:" << inputs[0]->shape() << " shape 2 is:" << inputs[1]->shape();
    return RET_ERROR;
  }
  return RET_OK;
 }
 void AddNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                                const std::vector<ge::Operator *> &npu_inputs) {
  op_ = new hiai::op::Add(name_);
  op_->set_input_x1(*npu_inputs[0]);
  op_->set_input_x2(*npu_inputs[1]);
 }
 ge::Operator *mindspore::kernel::AddNPUKernel::GetNPUOp() { return this->op_; }
 AddNPUKernel::~AddNPUKernel() {
  if (op_ != nullptr) {
    delete op_;
    op_ = nullptr;
  }
 }
 REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Add, NPUKernelCreator<AddNPUKernel>)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/add_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/add_npu.h
@ -0,0 +1,41 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_ADD_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_ADD_H_
 #include <vector>
 #include "src/runtime/kernel/npu/npu_kernel.h"
 #include "include/graph/op/math_defs.h"
 namespace mindspore::kernel {
 class AddNPUKernel : public NPUKernel {
 public:
  AddNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
               const std::vector<lite::Tensor *> &outputs)
      : NPUKernel(parameter, inputs, outputs) {}
  ~AddNPUKernel() override;
  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                OpParameter *opParameter) override;
  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                    const std::vector<ge::Operator *> &npu_inputs) override;
  ge::Operator *GetNPUOp() override;
 private:
  hiai::op::Add *op_;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_ADD_H_
--- a/mindspore/lite/src/runtime/kernel/npu/concat_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/concat_npu.cc
@ -0,0 +1,49 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/kernel/npu/concat_npu.h"
 #include "src/kernel_registry.h"
 using mindspore::kernel::KERNEL_ARCH::kNPU;
 using mindspore::lite::KernelRegistrar;
 using mindspore::schema::PrimitiveType_Concat;
 namespace mindspore::kernel {
 int ConcatNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                               OpParameter *opParameter) {
  return RET_OK;
 }
 void ConcatNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
                                   const std::vector<lite::Tensor *> &outputs,
                                   const std::vector<ge::Operator *> &npu_inputs) {
  op_ = new hiai::op::ConcatD(name_);
  op_->set_attr_concat_dim(concat_parameter_->axis_);
  op_->set_attr_N(npu_inputs.size());
  op_->create_dynamic_input_x(npu_inputs.size());
  for (int i = 0; i < npu_inputs.size(); ++i) {
    op_->set_dynamic_input_x(i + 1, *npu_inputs[i]);
  }
 }
 ge::Operator *mindspore::kernel::ConcatNPUKernel::GetNPUOp() { return this->op_; }
 ConcatNPUKernel::~ConcatNPUKernel() {
  if (op_ != nullptr) {
    delete op_;
    op_ = nullptr;
  }
 }
 REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Concat, NPUKernelCreator<ConcatNPUKernel>)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/concat_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/concat_npu.h
@ -0,0 +1,44 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_CONCAT_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_CONCAT_H_
 #include <vector>
 #include "nnacl/concat_parameter.h"
 #include "src/runtime/kernel/npu/npu_kernel.h"
 #include "include/graph/op/all_ops.h"
 namespace mindspore::kernel {
 class ConcatNPUKernel : public NPUKernel {
 public:
  ConcatNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                  const std::vector<lite::Tensor *> &outputs)
      : NPUKernel(parameter, inputs, outputs) {
    concat_parameter_ = reinterpret_cast<ConcatParameter *>(parameter);
  }
  ~ConcatNPUKernel() override;
  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                OpParameter *opParameter) override;
  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                    const std::vector<ge::Operator *> &npu_inputs) override;
  ge::Operator *GetNPUOp() override;
 private:
  hiai::op::ConcatD *op_;
  ConcatParameter *concat_parameter_;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_CONCAT_H_
--- a/mindspore/lite/src/runtime/kernel/npu/div_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/div_npu.cc
@ -0,0 +1,51 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/kernel/npu/div_npu.h"
 #include "include/graph/op/all_ops.h"
 #include "src/kernel_registry.h"
 using mindspore::kernel::KERNEL_ARCH::kNPU;
 using mindspore::lite::KernelRegistrar;
 using mindspore::schema::PrimitiveType_Div;
 namespace mindspore::kernel {
 int DivNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                            OpParameter *opParameter) {
  op_ = new hiai::op::RealDiv(name_);
  if (inputs[0]->shape() != inputs[1]->shape()) {
    MS_LOG(INFO) << "ddk 500 does not support broadcast."
                 << " shape 1 is:" << inputs[0]->shape() << " shape 2 is:" << inputs[1]->shape();
    return RET_ERROR;
  }
  return RET_OK;
 }
 void DivNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                                const std::vector<ge::Operator *> &npu_inputs) {
  op_->set_input_x1(*npu_inputs[0]);
  op_->set_input_x2(*npu_inputs[1]);
 }
 ge::Operator *mindspore::kernel::DivNPUKernel::GetNPUOp() { return this->op_; }
 DivNPUKernel::~DivNPUKernel() {
  if (op_ != nullptr) {
    delete op_;
    op_ = nullptr;
  }
 }
 REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Div, NPUKernelCreator<DivNPUKernel>)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/div_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/div_npu.h
@ -0,0 +1,40 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_DIV_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_DIV_H_
 #include <vector>
 #include "src/runtime/kernel/npu/npu_kernel.h"
 #include "include/graph/op/math_defs.h"
 namespace mindspore::kernel {
 class DivNPUKernel : public NPUKernel {
 public:
  DivNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
               const std::vector<lite::Tensor *> &outputs)
      : NPUKernel(parameter, inputs, outputs) {}
  ~DivNPUKernel() override;
  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                OpParameter *opParameter) override;
  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                    const std::vector<ge::Operator *> &npu_inputs) override;
  ge::Operator *GetNPUOp() override;
 private:
  hiai::op::RealDiv *op_;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_DIV_H_
--- a/mindspore/lite/src/runtime/kernel/npu/floor_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/floor_npu.cc
@ -0,0 +1,44 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/kernel/npu/floor_npu.h"
 #include "src/kernel_registry.h"
 using mindspore::kernel::KERNEL_ARCH::kNPU;
 using mindspore::lite::KernelRegistrar;
 using mindspore::schema::PrimitiveType_Floor;
 namespace mindspore::kernel {
 int FloorNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                              OpParameter *opParameter) {
  return RET_OK;
 }
 void FloorNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                                  const std::vector<ge::Operator *> &npu_inputs) {
  op_ = new hiai::op::Floor(name_);
  op_->set_input_x(*npu_inputs[0]);
 }
 ge::Operator *mindspore::kernel::FloorNPUKernel::GetNPUOp() { return this->op_; }
 FloorNPUKernel::~FloorNPUKernel() {
  if (op_ != nullptr) {
    delete op_;
    op_ = nullptr;
  }
 }
 REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Floor, NPUKernelCreator<FloorNPUKernel>)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/floor_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/floor_npu.h
@ -0,0 +1,40 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_FLOOR_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_FLOOR_H_
 #include <vector>
 #include "src/runtime/kernel/npu/npu_kernel.h"
 #include "include/graph/op/math_defs.h"
 namespace mindspore::kernel {
 class FloorNPUKernel : public NPUKernel {
 public:
  FloorNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                 const std::vector<lite::Tensor *> &outputs)
      : NPUKernel(parameter, inputs, outputs) {}
  ~FloorNPUKernel() override;
  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                OpParameter *opParameter) override;
  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                    const std::vector<ge::Operator *> &npu_inputs) override;
  ge::Operator *GetNPUOp() override;
 private:
  hiai::op::Floor *op_;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_FLOOR_H_
--- a/mindspore/lite/src/runtime/kernel/npu/mul_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/mul_npu.cc
@ -0,0 +1,51 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/kernel/npu/mul_npu.h"
 #include "include/graph/op/all_ops.h"
 #include "src/kernel_registry.h"
 using mindspore::kernel::KERNEL_ARCH::kNPU;
 using mindspore::lite::KernelRegistrar;
 using mindspore::schema::PrimitiveType_Mul;
 namespace mindspore::kernel {
 int MulNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                            OpParameter *opParameter) {
  if (inputs[0]->shape() != inputs[1]->shape()) {
    MS_LOG(INFO) << "ddk 500 does not support broadcast."
                 << " shape 1 is:" << inputs[0]->shape() << " shape 2 is:" << inputs[1]->shape();
    return RET_ERROR;
  }
  return RET_OK;
 }
 void MulNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                                const std::vector<ge::Operator *> &npu_inputs) {
  op_ = new hiai::op::Mul(name_);
  op_->set_input_x1(*npu_inputs[0]);
  op_->set_input_x2(*npu_inputs[1]);
 }
 ge::Operator *mindspore::kernel::MulNPUKernel::GetNPUOp() { return this->op_; }
 MulNPUKernel::~MulNPUKernel() {
  if (op_ != nullptr) {
    delete op_;
    op_ = nullptr;
  }
 }
 REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Mul, NPUKernelCreator<MulNPUKernel>)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/mul_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/mul_npu.h
@ -0,0 +1,40 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_MUL_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_MUL_H_
 #include <vector>
 #include "src/runtime/kernel/npu/npu_kernel.h"
 #include "include/graph/op/math_defs.h"
 namespace mindspore::kernel {
 class MulNPUKernel : public NPUKernel {
 public:
  MulNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
               const std::vector<lite::Tensor *> &outputs)
      : NPUKernel(parameter, inputs, outputs) {}
  ~MulNPUKernel() override;
  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                OpParameter *opParameter) override;
  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                    const std::vector<ge::Operator *> &npu_inputs) override;
  ge::Operator *GetNPUOp() override;
 private:
  hiai::op::Mul *op_;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_Mul_H_
--- a/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h
+++ b/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h
@ -0,0 +1,69 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_KERNEL_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_KERNEL_H_
 #include <vector>
 #include "src/lite_kernel.h"
 #include "include/errorcode.h"
 #include "include/graph/graph.h"
 #include "src/kernel_registry.h"
 using mindspore::kernel::LiteKernel;
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 namespace mindspore::kernel {
 class NPUKernel : public LiteKernel {
 public:
  NPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
            const std::vector<lite::Tensor *> &outputs)
      : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {}
  ~NPUKernel() override = default;
  int Run() override { return RET_ERROR; }
  virtual int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                        OpParameter *opParameter) {
    return RET_OK;
  }
  virtual ge::Operator *GetNPUOp() = 0;
  virtual void SetNPUInputs(const std::vector<mindspore::lite::Tensor *> &inputs,
                            const std::vector<lite::Tensor *> &outputs,
                            const std::vector<ge::Operator *> &npu_inputs) = 0;
 };
 template <class T>
 kernel::LiteKernel *NPUKernelCreator(const std::vector<lite::Tensor *> &inputs,
                                     const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
                                     const lite::InnerContext *ctx, const kernel::KernelKey &desc,
                                     const mindspore::lite::PrimitiveC *primitive) {
  auto *kernel = new (std::nothrow) T(opParameter, inputs, outputs);
  if (kernel == nullptr) {
    MS_LOG(ERROR) << "kernel " << opParameter->name_ << "is nullptr.";
    free(opParameter);
    return nullptr;
  }
  auto ret = kernel->IsSupport(inputs, outputs, opParameter);
  if (ret != RET_OK) {
    return nullptr;
  }
  return kernel;
 }
 }  // namespace mindspore::kernel
 #endif  // LITE_MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPUKERNEL_H_
--- a/mindspore/lite/src/runtime/kernel/npu/reshape_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/reshape_npu.cc
@ -0,0 +1,47 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/kernel/npu/reshape_npu.h"
 #include "src/kernel_registry.h"
 #include "include/graph/op/all_ops.h"
 #include "src/runtime/agent/npu/npu_converter_utils.h"
 using mindspore::kernel::KERNEL_ARCH::kNPU;
 using mindspore::lite::KernelRegistrar;
 using mindspore::schema::PrimitiveType_Reshape;
 namespace mindspore::kernel {
 int ReshapeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                                OpParameter *opParameter) {
  return RET_OK;
 }
 void ReshapeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
                                    const std::vector<lite::Tensor *> &outputs,
                                    const std::vector<ge::Operator *> &npu_inputs) {
  op_ = new hiai::op::Reshape(name_);
  op_->set_input_x(*npu_inputs[0]);
  op_->set_input_shape(*npu_inputs[1]);
 }
 ge::Operator *mindspore::kernel::ReshapeNPUKernel::GetNPUOp() { return this->op_; }
 ReshapeNPUKernel::~ReshapeNPUKernel() {
  if (op_ != nullptr) {
    delete op_;
    op_ = nullptr;
  }
 }
 REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Reshape, NPUKernelCreator<ReshapeNPUKernel>)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/reshape_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/reshape_npu.h
@ -0,0 +1,41 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_RESHAPE_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_RESHAPE_H_
 #include <vector>
 #include "nnacl/conv_parameter.h"
 #include "src/runtime/kernel/npu/npu_kernel.h"
 #include "include/graph/op/all_ops.h"
 namespace mindspore::kernel {
 class ReshapeNPUKernel : public NPUKernel {
 public:
  ReshapeNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                   const std::vector<lite::Tensor *> &outputs)
      : NPUKernel(parameter, inputs, outputs) {}
  ~ReshapeNPUKernel() override;
  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                OpParameter *opParameter) override;
  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                    const std::vector<ge::Operator *> &npu_inputs) override;
  ge::Operator *GetNPUOp() override;
 private:
  hiai::op::Reshape *op_;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_RESHAPE_H_
--- a/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc
@ -0,0 +1,46 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/kernel/npu/scale_npu.h"
 #include "src/kernel_registry.h"
 using mindspore::kernel::KERNEL_ARCH::kNPU;
 using mindspore::lite::KernelRegistrar;
 using mindspore::schema::PrimitiveType_Scale;
 namespace mindspore::kernel {
 int ScaleNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                              OpParameter *opParameter) {
  return RET_OK;
 }
 void ScaleNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                                  const std::vector<ge::Operator *> &npu_inputs) {
  op_ = new hiai::op::Scale(name_);
  op_->set_attr_axis(this->axis_);
  op_->set_input_x(*npu_inputs[0]);
  op_->set_input_scale(*npu_inputs[1]);
  op_->set_input_bias(*npu_inputs[2]);
 }
 ge::Operator *mindspore::kernel::ScaleNPUKernel::GetNPUOp() { return this->op_; }
 ScaleNPUKernel::~ScaleNPUKernel() {
  if (op_ != nullptr) {
    delete op_;
    op_ = nullptr;
  }
 }
 REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Scale, NPUKernelCreator<ScaleNPUKernel>)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/scale_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/scale_npu.h
@ -0,0 +1,44 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_SCALE_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_SCALE_H_
 #include <vector>
 #include "nnacl/scale.h"
 #include "src/runtime/kernel/npu/npu_kernel.h"
 #include "include/graph/op/nn_defs.h"
 namespace mindspore::kernel {
 class ScaleNPUKernel : public NPUKernel {
 public:
  ScaleNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                 const std::vector<lite::Tensor *> &outputs)
      : NPUKernel(parameter, inputs, outputs) {
    axis_ = reinterpret_cast<ScaleParameter *>(parameter)->axis_;
  }
  ~ScaleNPUKernel() override;
  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                OpParameter *opParameter) override;
  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                    const std::vector<ge::Operator *> &npu_inputs) override;
  ge::Operator *GetNPUOp() override;
 private:
  hiai::op::Scale *op_;
  int axis_;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_Scale_H_
--- a/mindspore/lite/src/runtime/kernel/npu/softmax_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/softmax_npu.cc
@ -0,0 +1,50 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #include "src/runtime/kernel/npu/softmax_npu.h"
 #include "src/kernel_registry.h"
 using mindspore::kernel::KERNEL_ARCH::kNPU;
 using mindspore::lite::KernelRegistrar;
 using mindspore::schema::PrimitiveType_SoftMax;
 namespace mindspore::kernel {
 int SoftmaxNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                                OpParameter *opParameter) {
  return RET_OK;
 }
 void SoftmaxNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
                                    const std::vector<lite::Tensor *> &outputs,
                                    const std::vector<ge::Operator *> &npu_inputs) {
  op_ = new hiai::op::Softmax(name_);
  if (axis_ == -1) {
    op_->set_attr_axis(inputs[0]->shape().size() - 1);
  } else {
    op_->set_attr_axis(axis_);
  }
  op_->set_input_x(*npu_inputs[0]);
 }
 ge::Operator *mindspore::kernel::SoftmaxNPUKernel::GetNPUOp() { return this->op_; }
 SoftmaxNPUKernel::~SoftmaxNPUKernel() {
  if (op_ != nullptr) {
    delete op_;
    op_ = nullptr;
  }
 }
 REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_SoftMax, NPUKernelCreator<SoftmaxNPUKernel>)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/softmax_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/softmax_npu.h
@ -0,0 +1,45 @@
 /**
 * Copyright 2020 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_SOFTMAX_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_SOFTMAX_H_
 #include <vector>
 #include "src/runtime/kernel/npu/npu_kernel.h"
 #include "nnacl/softmax_parameter.h"
 #include "include/graph/op/nn_defs.h"
 namespace mindspore::kernel {
 class SoftmaxNPUKernel : public NPUKernel {
 public:
  SoftmaxNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                   const std::vector<lite::Tensor *> &outputs)
      : NPUKernel(parameter, inputs, outputs) {
    auto softmax_parameter = reinterpret_cast<SoftmaxParameter *>(parameter);
    axis_ = softmax_parameter->axis_;
  }
  ~SoftmaxNPUKernel() override;
  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                OpParameter *opParameter) override;
  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
                    const std::vector<ge::Operator *> &npu_inputs) override;
  ge::Operator *GetNPUOp() override;
 private:
  hiai::op::Softmax *op_;
  int axis_;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_SOFTMAX_H_
--- a/mindspore/lite/src/scheduler.cc
+++ b/mindspore/lite/src/scheduler.cc
@ -28,10 +28,14 @@
 #include "src/runtime/kernel/opencl/opencl_subgraph.h"
 #include "src/runtime/opencl/opencl_runtime.h"
 #endif
-
+#if SUPPORT_NPU
 #include "src/runtime/agent/npu/subgraph_npu_kernel.h"
 #include "src/runtime/agent/npu/npu_manager.h"
 #endif
 namespace mindspore::lite {
 using kernel::KERNEL_ARCH::kCPU;
 using kernel::KERNEL_ARCH::kGPU;
 using kernel::KERNEL_ARCH::kNPU;
 int Scheduler::Schedule(const lite::Model *model, std::vector<Tensor *> *tensors,
                        std::vector<kernel::LiteKernel *> *kernels) {
@ -227,13 +231,13 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels) {
      break;
    }
    auto head_kernel = *head_kernel_iter;
-    if (head_kernel->desc().arch == mindspore::kernel::kNPU || head_kernel->desc().arch == mindspore::kernel::kAPU) {
+    if (head_kernel->desc().arch == mindspore::kernel::kAPU) {
-      MS_LOG(ERROR) << "Not support NPU and APU now";
+      MS_LOG(ERROR) << "Not support APU now";
      return RET_NOT_SUPPORT;
    }
    auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(head_kernel);
    auto sub_kernels = FindAllSubGraphKernels(head_kernel, &is_kernel_sinked);
-    auto subgraph = CreateSubGraphKernel(sub_kernels, cur_sub_graph_type);
+    auto subgraph = CreateSubGraphKernel(sub_kernels, cur_sub_graph_type, kernels->size());
    if (subgraph == nullptr) {
      MS_LOG(ERROR) << "Create SubGraphKernel failed";
      return RET_ERROR;
@ -244,8 +248,8 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels) {
 }
 kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel::LiteKernel *> &kernels,
-                                                        kernel::SubGraphType type) {
+                                                        kernel::SubGraphType type, int index) {
-  if (type == kernel::kApuSubGraph || type == kernel::kNpuSubGraph) {
+  if (type == kernel::kApuSubGraph) {
    return nullptr;
  }
  std::vector<Tensor *> input_tensors = kernel::LiteKernelUtil::SubgraphInputTensors(kernels);
@ -259,6 +263,17 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel
    return sub_kernel;
 #else
    return nullptr;
 #endif
  }
  if (type == kernel::kNpuSubGraph) {
 #if SUPPORT_NPU
    auto sub_kernel =
      new kernel::SubGraphNpuKernel(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_);
    sub_kernel->SetIndex(index);
    sub_kernel->Init();
    return sub_kernel;
 #else
    return nullptr;
 #endif
  }
  if (type == kernel::kCpuFP16SubGraph) {
@ -280,6 +295,19 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<Tensor *> &in_tens
  MS_ASSERT(primitive != nullptr);
  TypeId data_type = GetFirstFp32Fp16OrInt8Type(in_tensors);
  kernel::KernelKey desc{kCPU, data_type, static_cast<schema::PrimitiveType>(primitive->Type())};
 #if SUPPORT_NPU
  if (context_->IsNpuEnabled()) {
    kernel::KernelKey npu_desc{kNPU, desc.data_type, desc.type};
    auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, primitive, context_, npu_desc);
    if (kernel != nullptr) {
      MS_LOG(DEBUG) << "Get npu op success: " << schema::EnumNamePrimitiveType(npu_desc.type) << " " << node->name_;
      return kernel;
    } else {
      MS_LOG(DEBUG) << "Get npu op failed, scheduler to cpu: " << schema::EnumNamePrimitiveType(npu_desc.type) << " "
                    << node->name_;
    }
  }
 #endif
 #if SUPPORT_GPU
  if (context_->IsGpuEnabled()) {
    kernel::KernelKey gpu_desc{kGPU, desc.data_type, desc.type};
--- a/mindspore/lite/src/scheduler.h
+++ b/mindspore/lite/src/scheduler.h
@ -46,7 +46,7 @@ class Scheduler {
  int ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels);
  kernel::SubGraphKernel *CreateSubGraphKernel(const std::vector<kernel::LiteKernel *> &kernels,
-                                               kernel::SubGraphType type);
+                                               kernel::SubGraphType type, int index);
  std::vector<kernel::LiteKernel *> FindAllSubGraphKernels(
    kernel::LiteKernel *head_kernel, std::map<const kernel::LiteKernel *, bool> *sinked_kernel_map);
--- a/mindspore/lite/test/CMakeLists.txt
+++ b/mindspore/lite/test/CMakeLists.txt
@ -41,7 +41,6 @@ file(GLOB KERNEL_OP_TRAIN_SRC
 if (SUPPORT_TRAIN)
        list(APPEND KERNEL_OP_SRC ${KERNEL_OP_TRAIN_SRC})
 endif()
 if (PLATFORM_ARM64)
    # assembly
    file(GLOB TEST_ASSEMBLY_SRC ${LITE_DIR}/nnacl/assembly/arm64/*.s
@ -304,6 +303,10 @@ endif()
 	    #minddata-lite
 	    #           )
 	    #endif()
 if (SUPPORT_NPU)
    include_directories(${DDK_PATH})
    target_link_libraries(lite-test npu_kernel_mid)
 endif ()
 if (ENABLE_CONVERTER)
    target_link_libraries(lite-test
            anf_importer_mid
--- a/mindspore/lite/tools/benchmark/benchmark.cc
+++ b/mindspore/lite/tools/benchmark/benchmark.cc
@ -497,6 +497,12 @@ int Benchmark::RunBenchmark() {
    context->device_list_.push_back(gpu_device_ctx);
  }
  if (flags_->device_ == "NPU") {
    DeviceContext npu_device_ctx{DT_NPU};
    npu_device_ctx.device_info_.npu_device_info_.frequency_ = 3;
    context->device_list_.push_back(npu_device_ctx);
  }
  context->thread_num_ = flags_->num_threads_;
  session_ = session::LiteSession::CreateSession(context.get());
@ -702,7 +708,7 @@ int Benchmark::Init() {
    return RET_ERROR;
  }
-  if (flags_->device_ != "CPU" && flags_->device_ != "GPU") {
+  if (flags_->device_ != "CPU" && flags_->device_ != "GPU" && flags_->device_ != "NPU") {
    MS_LOG(ERROR) << "Device type:" << flags_->device_ << " is not supported.";
    std::cerr << "Device type:" << flags_->device_ << " is not supported." << std::endl;
    return RET_ERROR;
--- a/mindspore/lite/tools/benchmark/benchmark.h
+++ b/mindspore/lite/tools/benchmark/benchmark.h
@ -59,7 +59,7 @@ class MS_API BenchmarkFlags : public virtual FlagParser {
    // common
    AddFlag(&BenchmarkFlags::model_file_, "modelFile", "Input model file", "");
    AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", "");
-    AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU", "CPU");
+    AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU", "CPU");
    AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode",
            "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, defalut value: 1", 1);
    // MarkPerformance