add npu

2020-12-07 18:50:07 +08:00 · 2020-12-07 18:50:07 +08:00 · 65014d0988
parent c8dec08070
commit 65014d0988
40 changed files with 1948 additions and 43 deletions
--- a/build.sh
+++ b/build.sh
@ -23,7 +23,7 @@ usage()
 {
  echo "Usage:"
  echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
-  echo "              [-a on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|ascend|cpu] \\"
+  echo "              [-a on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|ascend|cpu|npu] \\"
  echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1|310|910] [-I arm64|arm32|x86_64] [-K] \\"
  echo "              [-B on|off] [-E] [-l on|off] [-n full|lite|off] [-T on|off] \\"
  echo "              [-A [cpp|java|object-c] [-C on|off] [-o on|off] [-S on|off] [-k on|off] [-W sse|neon|avx|off] \\"
@ -45,7 +45,7 @@ usage()
  echo "    -i Enable increment building, default off"
  echo "    -L Enable load ANF-IR as input of 'infer', default off"
  echo "    -j[n] Set the threads when building (Default: -j8)"
-  echo "    -e Use cpu, gpu, ascend"
+  echo "    -e Use cpu, gpu, npu or ascend"
  echo "    -P Enable dump anf graph to file in ProtoBuffer format, default on"
  echo "    -D Enable dumping of function graph ir, default on"
  echo "    -z Compile dataset & mindrecord, default on"
@ -121,7 +121,7 @@ checkopts()
  X86_64_SIMD="off"
  DEVICE_VERSION=""
  DEVICE=""
-
+  ENABLE_NPU="off"
  # Process the options
  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:D:zM:V:K:swB:En:T:A:C:o:S:k:W:' opt
  do
@ -379,6 +379,9 @@ parse_device()
      usage
      exit 1
    fi
+  elif [[ "X$DEVICE" == "Xnpu" ]]; then
+    ENABLE_NPU="on"
+    ENABLE_CPU="on"
  elif [[ "X$DEVICE" == "Xcpu" ]]; then
    ENABLE_CPU="on"
  elif [[ "X$DEVICE" == "X" ]]; then
@ -497,6 +500,15 @@ checkndk() {
    fi
 }

+checkddk() {
+    if [ "${HWHIAI_DDK}" ]; then
+        echo -e "\e[31mHWHIAI_DDK=$HWHIAI_DDK  \e[0m"
+    else
+        echo -e "\e[31mplease set HWHIAI_DDK in environment variable for example: export HWHIAI_DDK=/root/usr/hwhiai-ddk-100.500.010.010/ \e[0m"
+        exit 1
+    fi
+}
+
 gene_flatbuffer() {
    FLAT_DIR="${BASEPATH}/mindspore/lite/schema"
    cd ${FLAT_DIR} && rm -rf "${FLAT_DIR}/inner" && mkdir -p "${FLAT_DIR}/inner"
@ -612,6 +624,9 @@ build_lite()
      echo "start build opencl"
      build_opencl
    fi
+    if [ "${ENABLE_NPU}" == "on" ]; then
+      checkddk
+    fi
    if [ "${RUN_TESTCASES}" == "on" ]; then
        build_gtest
    fi
@ -634,7 +649,8 @@ build_lite()
              -DANDROID_STL=${ANDROID_STL} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_TRAIN=${SUPPORT_TRAIN}                     \
              -DPLATFORM_ARM64=on -DENABLE_NEON=on -DENABLE_FP16="off"      \
              -DENABLE_TOOLS=${ENABLE_TOOLS} -DENABLE_CONVERTER=${ENABLE_CONVERTER} -DBUILD_TESTCASES=${RUN_TESTCASES} \
-              -DSUPPORT_GPU=${ENABLE_GPU} -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
+              -DSUPPORT_GPU=${ENABLE_GPU} -DSUPPORT_NPU=${ENABLE_NPU} \
+              -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
              -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp -DMS_VERSION_MAJOR=${VERSION_MAJOR}                           \
              -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
              "${BASEPATH}/mindspore/lite"
@ -645,14 +661,16 @@ build_lite()
              -DANDROID_STL=${ANDROID_STL}  -DCMAKE_BUILD_TYPE=${BUILD_TYPE}                                                      \
              -DPLATFORM_ARM32=on -DENABLE_NEON=on -DSUPPORT_TRAIN=${SUPPORT_TRAIN}  \
              -DENABLE_TOOLS=${ENABLE_TOOLS} -DENABLE_CONVERTER=${ENABLE_CONVERTER} -DBUILD_TESTCASES=${RUN_TESTCASES} \
-              -DSUPPORT_GPU=${ENABLE_GPU} -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
+              -DSUPPORT_GPU=${ENABLE_GPU} -DSUPPORT_NPU=${ENABLE_NPU} \
+              -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
              -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp -DMS_VERSION_MAJOR=${VERSION_MAJOR}                           \
              -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} -DENABLE_VERBOSE=${ENABLE_VERBOSE} \
               "${BASEPATH}/mindspore/lite"
    else
        cmake -DPLATFORM_ARM64=off -DSUPPORT_TRAIN=${SUPPORT_TRAIN}   \
        -DENABLE_TOOLS=${ENABLE_TOOLS} -DENABLE_CONVERTER=${ENABLE_CONVERTER} -DBUILD_TESTCASES=${RUN_TESTCASES} \
-        -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_GPU=${ENABLE_GPU} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
+        -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DSUPPORT_GPU=${ENABLE_GPU} -DSUPPORT_NPU=${ENABLE_NPU} \
+        -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
        -DOFFLINE_COMPILE=${OPENCL_OFFLINE_COMPILE} -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp  \
        -DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
        -DENABLE_VERBOSE=${ENABLE_VERBOSE} -DX86_64_SIMD=${X86_64_SIMD} "${BASEPATH}/mindspore/lite"
--- a/cmake/package_lite.cmake
+++ b/cmake/package_lite.cmake
@ -66,6 +66,11 @@ if (BUILD_MINDDATA STREQUAL "lite_cv")
 endif ()

 if (PLATFORM_ARM64)
+    if (SUPPORT_NPU)
+        install(FILES ${DDK_LIB_PATH}/libhiai.so DESTINATION ${MAIN_DIR}-${COMPONENT_NAME}/third_party/hiai_ddk/lib COMPONENT ${COMPONENT_NAME})
+        install(FILES ${DDK_LIB_PATH}/libhiai_ir.so DESTINATION ${MAIN_DIR}-${COMPONENT_NAME}/third_party/hiai_ddk/lib COMPONENT ${COMPONENT_NAME})
+        install(FILES ${DDK_LIB_PATH}/libhiai_ir_build.so DESTINATION ${MAIN_DIR}-${COMPONENT_NAME}/third_party/hiai_ddk/lib COMPONENT ${COMPONENT_NAME})
+    endif()
    if (SUPPORT_TRAIN)
        install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
    else ()
--- a/mindspore/lite/CMakeLists.txt
+++ b/mindspore/lite/CMakeLists.txt
@ -17,6 +17,7 @@ option(ENABLE_FP16 "if build fp16 ops" off)
 option(ENABLE_TOOLS "if build tools" on)
 option(BUILD_TESTCASES "if build testcase" on)
 option(SUPPORT_GPU "if support gpu" off)
+option(SUPPORT_NPU "if support npu" off)
 option(OFFLINE_COMPILE "if offline compile OpenCL kernel" off)
 option(BUILD_MINDDATA_EXAMPLE "" on)
 option(ENABLE_VERBOSE "" off)
@ -39,14 +40,24 @@ if (PLATFORM_ARM64 OR PLATFORM_ARM32)
    set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
 endif()

-
-
 if (SUPPORT_GPU)
    set(PROCESS_UNIT gpu)
+elseif (SUPPORT_NPU)
+    set(PROCESS_UNIT npu)
 else ()
    set(PROCESS_UNIT cpu)
 endif ()

+if (SUPPORT_NPU)
+    set(DDK_PATH "$ENV{HWHIAI_DDK}/ddk/ai_ddk_lib")
+    if (PLATFORM_ARM64)
+        set(DDK_LIB_PATH ${DDK_PATH}/lib64)
+    elseif (PLATFORM_ARM32)
+        set(DDK_LIB_PATH ${DDK_PATH}/lib)
+    endif()
+    add_compile_definitions(SUPPORT_NPU)
+endif()
+
 add_compile_definitions(NO_DLIB)
 add_compile_options(-fPIC)

@ -61,7 +72,7 @@ if (SUPPORT_TRAIN)
        set(WIN_RUN_X86_NAME win-runtime-x86-${PROCESS_UNIT})
    else ()
        set(COMPONENT_NAME converter-ubuntu-train)
-    endif()
+    endif ()
    set(RUN_X86_COMPONENT_NAME runtime-x86-${PROCESS_UNIT}-train)
 else ()
    if (PLATFORM_ARM64)
@ -74,13 +85,13 @@ else ()
        set(WIN_RUN_X86_NAME win-runtime-x86-${PROCESS_UNIT})
    else ()
        set(COMPONENT_NAME converter-ubuntu)
-    endif()
+    endif ()
    if ("${X86_64_SIMD}" STREQUAL "sse")
        set(RUN_X86_COMPONENT_NAME runtime-x86-${X86_64_SIMD}-${PROCESS_UNIT})
    else ()
        set(RUN_X86_COMPONENT_NAME runtime-x86-${PROCESS_UNIT})
    endif ()
-endif()
+endif ()

 string(REPLACE "/mindspore/lite" "" TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 set(CORE_DIR ${TOP_DIR}/mindspore/core)
@ -121,14 +132,14 @@ else ()
    if (NOT WIN32)
        set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_SHARED_LINKER_FLAGS}")
        set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_EXE_LINKER_FLAGS}")
-    endif()
+    endif ()
 endif ()
 if (ENABLE_VERBOSE)
    set(CMAKE_VERBOSE_MAKEFILE on)
 endif ()
 if (SUPPORT_TRAIN)
    add_compile_definitions(SUPPORT_TRAIN)
-endif()
+endif ()
 if (ENABLE_NEON)
    add_compile_definitions(ENABLE_NEON)
 endif ()
@ -155,7 +166,7 @@ endif ()
 if (ENABLE_CONVERTER)
    if (PLATFORM_ARM)
        MESSAGE(FATAL_ERROR "Cannot build converter in arm platform")
-    endif()
+    endif ()
    include_directories(${PYTHON_INCLUDE_DIRS})
    include(${TOP_DIR}/cmake/external_libs/eigen.cmake)
    include(${TOP_DIR}/cmake/external_libs/protobuf.cmake)
@ -207,18 +218,18 @@ if (ENABLE_TOOLS)
    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
    if (SUPPORT_TRAIN)
        add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/net_train)
-    endif()
-endif()
+    endif ()
+endif ()
 if (NOT WIN32)
    if (ENABLE_TOOLS)
        if (NOT PLATFORM_ARM32 AND NOT PLATFORM_ARM64)
            add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/schema_gen)
            add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/lib_cropper)
        endif ()
-    endif()
+    endif ()
    if (BUILD_TESTCASES)
        add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/test)
-    endif()
+    endif ()
 endif ()

 include(${TOP_DIR}/cmake/package_lite.cmake)
--- a/mindspore/lite/include/context.h
+++ b/mindspore/lite/include/context.h
@ -48,10 +48,16 @@ typedef struct {
  bool enable_float16_ = false; /**< prior enable float16 inference */
 } GpuDeviceInfo;

+/// \brief NpuDeviceInfo defined for NPU's configuration information.
+typedef struct {
+  int frequency_ = 3; /**< npu frequency inference */
+} NpuDeviceInfo;
+
 /// \brief DeviceInfo defined for backend's configuration information.
 union DeviceInfo {
  CpuDeviceInfo cpu_device_info_;
  GpuDeviceInfo gpu_device_info_;
+  NpuDeviceInfo npu_device_info_;
 };

 /// \brief DeviceContext defined for holding backend's configuration information.
--- a/mindspore/lite/src/CMakeLists.txt
+++ b/mindspore/lite/src/CMakeLists.txt
@ -88,6 +88,12 @@ else ()
    target_link_libraries(mindspore-lite cpu_kernel_mid nnacl cpu_ops_mid)
    target_link_libraries(mindspore-lite_static cpu_kernel_mid nnacl_mid cpu_ops_mid)
 endif ()
+if (SUPPORT_NPU)
+    add_subdirectory(runtime/agent/npu)
+    include_directories(${DDK_PATH})
+    target_link_libraries(mindspore-lite npu_kernel_mid)
+    target_link_libraries(mindspore-lite_static npu_kernel_mid)
+endif ()
 if (PLATFORM_ARM32 OR PLATFORM_ARM64)
    target_link_libraries(mindspore-lite log)
    target_link_libraries(mindspore-lite_static log)
--- a/mindspore/lite/src/inner_context.cc
+++ b/mindspore/lite/src/inner_context.cc
@ -17,6 +17,9 @@
 #include "src/inner_context.h"
 #include "include/errorcode.h"
 #include "src/common/log_adapter.h"
+#ifdef SUPPORT_NPU
+#include "src/runtime/agent/npu/npu_manager.h"
+#endif

 namespace mindspore::lite {
 InnerContext::InnerContext(const Context *context) {
@ -74,10 +77,12 @@ int InnerContext::IsValid() {
    return RET_NOT_SUPPORT;
  }
 #endif
+#ifndef SUPPORT_NPU
  if (IsNpuEnabled()) {
    MS_LOG(ERROR) << "NPU is not supported.";
    return RET_NOT_SUPPORT;
  }
+#endif
  return RET_OK;
 }

@ -108,9 +113,14 @@ bool InnerContext::IsGpuEnabled() {
 }

 bool InnerContext::IsNpuEnabled() {
+#ifdef SUPPORT_NPU
  return this->device_list_.end() !=
-         std::find_if(this->device_list_.begin(), this->device_list_.end(),
-                      [](const DeviceContext &device) { return device.device_type_ == DT_NPU; });
+           std::find_if(this->device_list_.begin(), this->device_list_.end(),
+                        [](const DeviceContext &device) { return device.device_type_ == DT_NPU; }) &&
+         mindspore::lite::NPUManager::GetInstance()->IsSupportNPU();
+#else
+  return false;
+#endif
 }

 CpuDeviceInfo InnerContext::GetCpuInfo() {
@ -132,4 +142,15 @@ GpuDeviceInfo InnerContext::GetGpuInfo() {
    return iter->device_info_.gpu_device_info_;
  }
 }
+
+NpuDeviceInfo InnerContext::GetNpuInfo() const {
+  auto iter = std::find_if(this->device_list_.begin(), this->device_list_.end(),
+                           [](const DeviceContext &device) { return device.device_type_ == DT_NPU; });
+  if (iter == this->device_list_.end()) {
+    return {};
+  } else {
+    return iter->device_info_.npu_device_info_;
+  }
+}
+
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/inner_context.h
+++ b/mindspore/lite/src/inner_context.h
@ -47,6 +47,8 @@ struct InnerContext : public Context {

  GpuDeviceInfo GetGpuInfo();

+  NpuDeviceInfo GetNpuInfo() const;
+
  int IsValid();

  virtual ~InnerContext();
--- a/mindspore/lite/src/lite_session.cc
+++ b/mindspore/lite/src/lite_session.cc
@ -27,7 +27,10 @@
 #include "src/common/graph_util.h"
 #include "src/kernel_registry.h"
 #include "src/model_common.h"
-#include "mindspore/lite/src/runtime/kernel/arm/base/dequant.h"
+#include "src/runtime/kernel/arm/base/dequant.h"
+#if SUPPORT_NPU
+#include "src/runtime/agent/npu/npu_manager.h"
+#endif

 namespace mindspore {
 namespace lite {
@ -330,6 +333,14 @@ int LiteSession::CompileGraph(Model *model) {
    is_running_.store(false);
    return ret;
  }
+#if SUPPORT_NPU
+  if (this->context_->IsNpuEnabled()) {
+    if (mindspore::lite::NPUManager::GetInstance()->LoadOMModel() != RET_OK) {
+      MS_LOG(ERROR) << "NPU client load model failed.";
+      return RET_ERROR;
+    }
+  }
+#endif
  ret = executor_->Prepare(this->kernels_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Prepare executor failed: " << ret;
@ -410,19 +421,18 @@ int LiteSession::Init(const Context *context) {
    is_running_.store(false);
    return ret;
  }
-#if SUPPORT_GPU
-  if (this->context_->IsGpuEnabled()) {
-    auto gpu_device_info = this->context_->GetGpuInfo();
-    auto opencl_runtime = ocl_runtime_wrap_.GetInstance();
-    opencl_runtime->SetFp16Enable(gpu_device_info.enable_float16_);
-    if (opencl_runtime->Init() != RET_OK) {
-      this->context_->device_list_ = {{DT_CPU, {gpu_device_info.enable_float16_, MID_CPU}}};
-      MS_LOG(WARNING) << "Init OpenCL runtime failed, change to CPU mode.";
-    } else {
-      MS_LOG(INFO) << "Init OpenCL runtime success.";
-    }
+  ret = InitGPURuntime();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Init GPU runtime failed.";
+    is_running_.store(false);
+    return ret;
+  }
+  ret = InitNPURuntime();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Init NPU runtime failed.";
+    is_running_.store(false);
+    return ret;
  }
-#endif
  executor_ = new (std::nothrow) Executor();
  if (nullptr == executor_) {
    MS_LOG(ERROR) << "New Executor failed";
@ -573,6 +583,35 @@ int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs
  is_running_.store(false);
  return RET_OK;
 }
+
+int LiteSession::InitNPURuntime() {
+#if SUPPORT_NPU
+  if (this->context_->IsNpuEnabled()) {
+    if (mindspore::lite::NPUManager::GetInstance()->InitClient() != RET_OK) {
+      MS_LOG(ERROR) << "NPU client init error.";
+      return RET_ERROR;
+    }
+  }
+#endif
+  return RET_OK;
+}
+
+int LiteSession::InitGPURuntime() {
+#if SUPPORT_GPU
+  if (this->context_->IsGpuEnabled()) {
+    auto gpu_device_info = this->context_->GetGpuInfo();
+    auto opencl_runtime = ocl_runtime_wrap_.GetInstance();
+    opencl_runtime->SetFp16Enable(gpu_device_info.enable_float16_);
+    if (opencl_runtime->Init() != RET_OK) {
+      this->context_->device_list_ = {{DT_CPU, {gpu_device_info.enable_float16_, MID_CPU}}};
+      MS_LOG(WARNING) << "Init OpenCL runtime failed, change to CPU mode.";
+    } else {
+      MS_LOG(INFO) << "Init OpenCL runtime success.";
+    }
+  }
+#endif
+  return RET_OK;
+}
 }  // namespace lite

 session::LiteSession *session::LiteSession::CreateSession(const lite::Context *context) {
--- a/mindspore/lite/src/lite_session.h
+++ b/mindspore/lite/src/lite_session.h
@ -96,6 +96,10 @@ class LiteSession : public session::LiteSession {
 private:
  void ResetInputsShape(const std::vector<std::vector<int>> &dims);

+  int InitNPURuntime();
+
+  int InitGPURuntime();
+
 protected:
  InnerContext *context_ = nullptr;
  std::vector<kernel::LiteKernel *> kernels_;
--- a/mindspore/lite/src/runtime/agent/npu/CMakeLists.txt
+++ b/mindspore/lite/src/runtime/agent/npu/CMakeLists.txt
@ -0,0 +1,23 @@
+include_directories(${DDK_PATH})
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kernel)
+file(GLOB_RECURSE NPU_RUNTIME_SRC
+        ${CMAKE_CURRENT_SOURCE_DIR}/*.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../kernel/npu/*.cc
+        )
+add_library(hiai SHARED IMPORTED)
+set_target_properties(hiai PROPERTIES IMPORTED_LOCATION
+        ${DDK_LIB_PATH}/libhiai.so)
+add_library(hiai_ir SHARED IMPORTED)
+set_target_properties(hiai_ir PROPERTIES IMPORTED_LOCATION
+        ${DDK_LIB_PATH}/libhiai_ir.so)
+add_library(hiai_ir_build SHARED IMPORTED)
+set_target_properties(hiai_ir_build PROPERTIES IMPORTED_LOCATION
+        ${DDK_LIB_PATH}/libhiai_ir_build.so)
+add_library(npu_kernel_mid OBJECT ${NPU_RUNTIME_SRC})
+target_link_libraries(
+        npu_kernel_mid
+        hiai
+        hiai_ir
+        hiai_ir_build
+)
--- a/mindspore/lite/src/runtime/agent/npu/npu_converter_utils.cc
+++ b/mindspore/lite/src/runtime/agent/npu/npu_converter_utils.cc
@ -0,0 +1,160 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/agent/npu/npu_converter_utils.h"
+namespace mindspore::lite {
+ge::Shape ConverterToNPUShape(const std::vector<int> &src_shape) {
+  vector<int64_t> shapes;
+  shapes.reserve(src_shape.size());
+  for (int i = 0; i < src_shape.size(); i++) {
+    shapes.push_back(src_shape[i]);
+  }
+  return ge::Shape({shapes});
+}
+ge::Format ConverterToNPUFormat(schema::Format format) {
+  ge::Format ge_format;
+  switch (format) {
+    case schema::Format_NCHW:
+      ge_format = ge::FORMAT_NCHW;
+      break;
+    case schema::Format_NHWC:
+      ge_format = ge::FORMAT_NHWC;
+      break;
+    default:
+      MS_LOG(ERROR) << "Unsupported format:" << format;
+      // use unused format to indicate errors.
+      ge_format = ge::FORMAT_ND;
+      break;
+  }
+  return ge_format;
+}
+
+ge::DataType ConverterToNPUDataType(TypeId type_id) {
+  ge::DataType data_type;
+  switch (type_id) {
+    case kNumberTypeFloat:
+    case kNumberTypeFloat32:
+      data_type = ge::DT_FLOAT;
+      break;
+    case kNumberTypeFloat16:
+      data_type = ge::DT_FLOAT16;
+      break;
+    case kNumberTypeInt8:
+      data_type = ge::DT_INT8;
+      break;
+    case kNumberTypeUInt8:
+      data_type = ge::DT_UINT8;
+      break;
+    case kNumberTypeInt16:
+      data_type = ge::DT_INT16;
+      break;
+    case kNumberTypeInt32:
+      data_type = ge::DT_INT32;
+      break;
+    case kNumberTypeUInt32:
+      data_type = ge::DT_UINT32;
+      break;
+    default:
+      data_type = ge::DT_UNDEFINED;
+      break;
+  }
+  return data_type;
+}
+hiai::op::Data *ConverterToNPUData(Tensor *src, const std::string &name) {
+  auto data = new (std::nothrow) hiai::op::Data(name);
+  if (data == nullptr) {
+    MS_LOG(ERROR) << "new data failed.";
+    return data;
+  }
+  ge::TensorDesc tensor_desc(ConverterToNPUShape(src->shape()), ConverterToNPUFormat(src->format()),
+                             ConverterToNPUDataType(src->data_type()));
+  data->update_input_desc_x(tensor_desc);
+  return data;
+}
+
+std::shared_ptr<ge::Tensor> ConverterToNPUTensor(Tensor *src) {
+  std::shared_ptr<ge::Tensor> ge_tensor = std::shared_ptr<ge::Tensor>(new (std::nothrow) ge::Tensor());
+  if (ge_tensor == nullptr) {
+    MS_LOG(ERROR) << "new ge_tensor failed.";
+    return ge_tensor;
+  }
+  ge::TensorDesc tensor_desc(ConverterToNPUShape(src->shape()), ConverterToNPUFormat(src->format()),
+                             ConverterToNPUDataType(src->data_type()));
+
+  ge_tensor->SetTensorDesc(tensor_desc);
+
+  if (src->data_c() != nullptr) {
+    ge_tensor->SetData(reinterpret_cast<const uint8_t *>(src->data_c()), src->Size());
+  }
+  return ge_tensor;
+}
+/*
+ *    mode  : Activation mode, with options as follows:
+ *            0 : Sigmoid
+ *            1 : ReLU
+ *            2 : Tanh
+ *            3 : Clipped ReLU
+ *            4 : ELU
+ *            5 : PReLU
+ *            6 : Abs
+ *            7 : Relu1
+ *            8 : Softsign
+ *            9 : Softplus
+ *            10 : Hardsigmoid
+ *            11 : Threshold ReLU
+ *            12 : Selu
+ *            13 : Linear
+ *            14 : Relu6
+ *            15 : GeLU.
+ */
+int ConverterToNPUActMode(schema::ActivationType type) {
+  switch (type) {
+    case schema::ActivationType_NO_ACTIVATION:
+      return -1;
+    case schema::ActivationType_SIGMOID:
+      return 0;
+    case schema::ActivationType_RELU:
+      return 1;
+    case schema::ActivationType_TANH:
+      return 2;
+    case schema::ActivationType_ELU:
+      return 4;
+    case schema::ActivationType_LEAKY_RELU:
+      return 5;
+    case schema::ActivationType_ABS:
+      return 6;
+    case schema::ActivationType_RELU1:
+      return 7;
+    case schema::ActivationType_SOFTSIGN:
+      return 8;
+    case schema::ActivationType_SOFTPLUS:
+      return 9;
+    case schema::ActivationType_HSIGMOID:
+      return 10;
+    case schema::ActivationType_THRESHOLDRELU:
+      return 11;
+    case schema::ActivationType_SELU:
+      return 12;
+    case schema::ActivationType_LINEAR:
+      return 13;
+    case schema::ActivationType_RELU6:
+      return 14;
+    default:
+      MS_LOG(ERROR) << "Unsupport activation type to NPU." << type;
+      return -1;
+  }
+}
+}  // namespace mindspore::lite
--- a/mindspore/lite/src/runtime/agent/npu/npu_converter_utils.h
+++ b/mindspore/lite/src/runtime/agent/npu/npu_converter_utils.h
@ -0,0 +1,42 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONVERTER_UITLS_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONVERTER_UITLS_H_
+#include <string>
+#include <memory>
+#include <vector>
+#include "schema/ops_generated.h"
+#include "include/graph/tensor.h"
+#include "include/graph/op/array_defs.h"
+#include "src/tensor.h"
+
+namespace mindspore::lite {
+
+std::shared_ptr<ge::Tensor> ConverterToNPUTensor(Tensor *src);
+
+hiai::op::Data *ConverterToNPUData(Tensor *src, const std::string &name);
+
+ge::Format ConverterToNPUFormat(schema::Format format);
+
+ge::DataType ConverterToNPUDataType(TypeId type_id);
+
+ge::Shape ConverterToNPUShape(const std::vector<int> &src_shape);
+
+int ConverterToNPUActMode(schema::ActivationType type);
+
+}  // namespace mindspore::lite
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_CONVERTER_UITLS_H_
--- a/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
+++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.cc
@ -0,0 +1,131 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/agent/npu/npu_executor.h"
+#include "include/errorcode.h"
+#include "src/runtime/agent/npu/npu_manager.h"
+
+namespace mindspore::lite {
+int NPUExecutor::Prepare(const std::vector<kernel::LiteKernel *> &kernels) {
+  this->client_ = mindspore::lite::NPUManager::GetInstance()->GetClient();
+  if (this->client_ == nullptr) {
+    MS_LOG(ERROR) << "client is nullptr.";
+    return RET_ERROR;
+  }
+  if (GetIOTensorVec() != RET_OK) {
+    MS_LOG(ERROR) << "Load model failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int NPUExecutor::Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_tensors,
+                     std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator, const KernelCallBack &before,
+                     const KernelCallBack &after) {
+  hiai::AiContext context;
+  for (int i = 0; i < npu_input_tensors_.size(); ++i) {
+    memcpy(npu_input_tensors_[i]->GetBuffer(), in_tensors[i]->data_c(), in_tensors[i]->Size());
+  }
+  context.AddPara("model_name", model_name_);
+  if (this->client_ == nullptr) {
+    MS_LOG(ERROR) << "NPU client is nullptr";
+    return RET_ERROR;
+  }
+  int stamp;
+  int ret = this->client_->Process(context, this->npu_input_tensors_, this->npu_output_tensors_, 1000, stamp);
+  if (ret != hiai::AI_SUCCESS) {
+    MS_LOG(ERROR) << "NPU Process failed. code is " << ret;
+    return RET_ERROR;
+  }
+
+  for (int i = 0; i < npu_output_tensors_.size(); ++i) {
+    memcpy(out_tensors[i]->MutableData(), npu_output_tensors_[i]->GetBuffer(), npu_output_tensors_[i]->GetSize());
+  }
+
+  return RET_OK;
+}
+
+int NPUExecutor::GetIOTensorVec() {
+  std::vector<hiai::TensorDimension> input_dimension;
+  std::vector<hiai::TensorDimension> output_dimension;
+  input_dimension.clear();
+  output_dimension.clear();
+  if (this->client_ == nullptr) {
+    MS_LOG(ERROR) << "client is nullptr.";
+    return RET_ERROR;
+  }
+  auto ret = this->client_->GetModelIOTensorDim(model_name_, input_dimension, output_dimension);
+  if (ret != hiai::AI_SUCCESS) {
+    MS_LOG(ERROR) << "Get model input and output tensor dims failed." << ret;
+    return RET_ERROR;
+  }
+  ret = UpdateInputTensorVec(input_dimension);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Update input tensor vector failed. " << ret;
+    return RET_ERROR;
+  }
+  ret = UpdateOutputTensorVec(output_dimension);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Update output tensor vector failed. " << ret;
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int NPUExecutor::UpdateInputTensorVec(const std::vector<hiai::TensorDimension> &input_dimension) {
+  if (input_dimension.empty()) {
+    MS_LOG(ERROR) << "npu input tensor dimension is empty.";
+    return RET_ERROR;
+  }
+  npu_input_tensors_.resize(input_dimension.size());
+  npu_input_tensors_.clear();
+  for (const auto &inDim : input_dimension) {
+    std::shared_ptr<hiai::AiTensor> input = std::make_shared<hiai::AiTensor>();
+    if (input->Init(&inDim) != hiai::AI_SUCCESS) {
+      MS_LOG(ERROR) << "Input AiTensor init failed.";
+      return RET_ERROR;
+    }
+    npu_input_tensors_.push_back(input);
+  }
+  if (npu_input_tensors_.empty()) {
+    MS_LOG(ERROR) << "NPU input tensor is empty.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int NPUExecutor::UpdateOutputTensorVec(const std::vector<hiai::TensorDimension> &output_dimension) {
+  if (output_dimension.empty()) {
+    MS_LOG(ERROR) << "output_dimension_ is empty.";
+    return RET_ERROR;
+  }
+  npu_output_tensors_.resize(output_dimension.size());
+  npu_output_tensors_.clear();
+  for (const auto &outDim : output_dimension) {
+    std::shared_ptr<hiai::AiTensor> output = std::make_shared<hiai::AiTensor>();
+    int ret = output->Init(&outDim);
+    if (ret != hiai::AI_SUCCESS) {
+      return RET_ERROR;
+    }
+    npu_output_tensors_.push_back(output);
+  }
+  if (npu_output_tensors_.empty()) {
+    MS_LOG(ERROR) << "NPU output tensor is empty.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+}  // namespace mindspore::lite
--- a/mindspore/lite/src/runtime/agent/npu/npu_executor.h
+++ b/mindspore/lite/src/runtime/agent/npu/npu_executor.h
@ -0,0 +1,52 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_EXECUTOR_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_EXECUTOR_H_
+#include <string>
+#include <memory>
+#include <utility>
+#include <vector>
+#include "src/executor.h"
+#include "include/errorcode.h"
+#include "include/HiAiModelManagerService.h"
+
+namespace mindspore::lite {
+class NPUExecutor : public Executor {
+ public:
+  explicit NPUExecutor(const std::string &model_name) { this->model_name_ = model_name; }
+  ~NPUExecutor() override = default;
+  int Prepare(const std::vector<kernel::LiteKernel *> &kernels) override;
+
+  int Run(std::vector<Tensor *> &in_tensors, std::vector<Tensor *> &out_tensors,
+          std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator = nullptr,
+          const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr) override;
+
+ private:
+  int GetIOTensorVec();
+
+  int UpdateInputTensorVec(const std::vector<hiai::TensorDimension> &input_dimension);
+
+  int UpdateOutputTensorVec(const std::vector<hiai::TensorDimension> &output_dimension);
+
+ private:
+  std::string model_name_;
+  std::shared_ptr<hiai::AiModelMngerClient> client_ = nullptr;
+  std::vector<std::shared_ptr<hiai::AiTensor>> npu_input_tensors_;
+  std::vector<std::shared_ptr<hiai::AiTensor>> npu_output_tensors_;
+};
+}  // namespace mindspore::lite
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_EXECUTOR_H_
--- a/mindspore/lite/src/runtime/agent/npu/npu_manager.cc
+++ b/mindspore/lite/src/runtime/agent/npu/npu_manager.cc
@ -0,0 +1,217 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/agent/npu/npu_manager.h"
+#include <sys/fcntl.h>
+#include <unistd.h>
+#include "include/hiai_ir_build.h"
+#include "include/HiAiModelManagerService.h"
+#include "include/errorcode.h"
+#include "include/graph/op/all_ops.h"
+#include "src/common/file_utils.h"
+
+namespace mindspore::lite {
+
+bool NPUManager::IsSupportNPU() {
+  if (!is_npu_check_executor) {
+    CheckSupportNPU();
+  }
+  if (is_support_npu) {
+    MS_LOG(INFO) << "The current device support NPU.";
+    return true;
+  } else {
+    MS_LOG(INFO) << "The current device NOT SUPPORT NPU.";
+    return false;
+  }
+}
+
+std::string NPUManager::GetExecutorPath() {
+  std::string executor_path;
+  char cmdline[1024] = {0};
+  int fd = open("/proc/self/cmdline", O_RDONLY);
+  if (fd >= 0) {
+    char ch;
+    int i = 0;
+    while (read(fd, &ch, sizeof(ch)) > 0 && !isspace(ch)) {
+      if (':' == ch) {
+        break;
+      }
+      cmdline[i] = ch;
+      i++;
+    }
+    close(fd);
+  }
+  executor_path = std::string(cmdline);
+  if (executor_path.empty()) {
+    executor_path = "./";
+  }
+  // android
+  if (executor_path.substr(0, 11) == "/data/data/") {
+    executor_path = executor_path + '/';
+  } else {
+    // Linux
+    executor_path = executor_path.substr(0, executor_path.rfind('/')) + "/";
+  }
+  return executor_path;
+}
+
+bool NPUManager::IsKirinChip() {
+  std::ifstream cpu_info("/proc/cpuinfo");
+  if (!(cpu_info.good() && cpu_info.is_open())) {
+    return false;
+  }
+  std::string line;
+  while (!cpu_info.eof()) {
+    getline(cpu_info, line);
+    if (line.find("Hardware") == string::npos) {
+      continue;
+    }
+    auto index = line.find("Kirin");
+    if (index == string::npos) {
+      continue;
+    }
+    auto kirin_number_str = line.substr(index + 5);
+    auto kirin_number = atoi(kirin_number_str.c_str());
+    if (kirin_number >= 985 || kirin_number == 810 || kirin_number == 820) {
+      cpu_info.close();
+      return true;
+    } else {
+      cpu_info.close();
+      return false;
+    }
+  }
+  return false;
+}
+
+bool WriteToOMFile(domi::ModelBufferData om_model_buff, const std::string &om_file_path) {
+  FILE *fp;
+  fp = fopen(om_file_path.c_str(), "wb");
+  if (fp == nullptr) {
+    MS_LOG(ERROR) << om_file_path.c_str() << " open failed.";
+    return false;
+  }
+
+  auto write_size = (uint32_t)fwrite(om_model_buff.data, 1, om_model_buff.length, fp);
+  if (write_size != om_model_buff.length) {
+    fclose(fp);
+    MS_LOG(ERROR) << "Write om file failed.";
+    return false;
+  }
+  fclose(fp);
+  return true;
+}
+
+bool NPUManager::CheckOmBuildIr(const std::string &path) {
+  // build test om model
+  std::shared_ptr<hiai::op::Add> add_op(new (std::nothrow) hiai::op::Add("add"));
+  if (add_op == nullptr) {
+    MS_LOG(ERROR) << "new add_op failed.";
+    return false;
+  }
+  ge::TensorDesc desc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_FLOAT);
+  std::shared_ptr<hiai::op::Data> data = std::make_shared<hiai::op::Data>("data");
+  data->update_input_desc_x(desc);
+  add_op->set_input_x1(*data);
+  add_op->set_input_x2(*data);
+  domi::HiaiIrBuild ir_build;
+  ge::Graph ir_graph("graph");
+  std::vector<ge::Operator> inputs{*data, *data};
+  std::vector<ge::Operator> outputs{*add_op};
+  ir_graph.SetInputs(inputs).SetOutputs(outputs);
+  ge::Model om_model("test_model", "test_version");
+  om_model.SetGraph(ir_graph);
+
+  domi::ModelBufferData om_model_buff;
+  if (!ir_build.CreateModelBuff(om_model, om_model_buff)) {
+    MS_LOG(ERROR) << "Create model buffer failed.";
+    return false;
+  }
+  if (!ir_build.BuildIRModel(om_model, om_model_buff)) {
+    MS_LOG(ERROR) << "Build IR model failed.";
+    return false;
+  }
+
+  // save test om model
+  remove(path.c_str());
+  bool ret = WriteToOMFile(om_model_buff, path);
+  ir_build.ReleaseModelBuff(om_model_buff);
+  return ret;
+}
+
+void NPUManager::CheckSupportNPU() {
+  is_npu_check_executor = true;
+  std::string path_string = GetExecutorPath();
+
+  std::string test_model_path = path_string + "/mindspore_lite_test_npu.om";
+  std::ifstream ifs(test_model_path);
+  if (ifs.good() && ifs.is_open()) {
+    ifs.close();
+    is_support_npu = true;
+    return;
+  }
+  if (!IsKirinChip()) {
+    MS_LOG(ERROR) << "The current device chip NOT SUPPORT NPU";
+    is_support_npu = false;
+    return;
+  }
+
+  if (!CheckOmBuildIr(test_model_path)) {
+    MS_LOG(ERROR) << "Build OM IR error.";
+    is_support_npu = false;
+    return;
+  }
+  is_support_npu = true;
+}
+
+int NPUManager::AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency) {
+  hiai::MemBuffer *buffer = mc_builder_->InputMemBufferCreate(model_buf, size);
+  if (buffer == nullptr) {
+    MS_LOG(ERROR) << "MemBuffer is null.";
+    return RET_ERROR;
+  }
+
+  auto desc = std::make_shared<hiai::AiModelDescription>(model_name, frequency, 0, 0, 0);
+  desc->SetModelBuffer(buffer->GetMemBufferData(), buffer->GetMemBufferSize());
+  model_desc_.push_back(desc);
+  mc_builder_->MemBufferDestroy(buffer);
+
+  return RET_OK;
+}
+
+int NPUManager::InitClient() {
+  this->client_ = std::make_shared<hiai::AiModelMngerClient>();
+  if (this->client_ == nullptr) {
+    return RET_ERROR;
+  }
+  int ret = this->client_->Init(nullptr);
+  if (ret != hiai::AI_SUCCESS) {
+    return RET_ERROR;
+  }
+  mc_builder_ = std::make_shared<hiai::AiModelBuilder>(this->client_);
+  return RET_OK;
+}
+
+int NPUManager::LoadOMModel() {
+  int ret = this->client_->Load(model_desc_);
+  if (ret != hiai::AI_SUCCESS) {
+    MS_LOG(ERROR) << "Client load model failed." << ret;
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+std::shared_ptr<hiai::AiModelMngerClient> NPUManager::GetClient() { return client_; }
+}  // namespace mindspore::lite
--- a/mindspore/lite/src/runtime/agent/npu/npu_manager.h
+++ b/mindspore/lite/src/runtime/agent/npu/npu_manager.h
@ -0,0 +1,68 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_
+#include <string>
+#include <memory>
+#include <vector>
+#include "include/HiAiModelManagerService.h"
+
+namespace mindspore::lite {
+
+class NPUManager {
+ public:
+  static NPUManager *GetInstance() {
+    static NPUManager npuManager;
+    return &npuManager;
+  }
+
+  bool IsSupportNPU();
+
+  int InitClient();
+
+  // provide to subgraph to add model.
+  int AddModel(void *model_buf, uint32_t size, const std::string &model_name, int frequency);
+
+  // scheduler to load om model.
+  int LoadOMModel();
+
+  // provide to executor.
+  std::shared_ptr<hiai::AiModelMngerClient> GetClient();
+
+ private:
+  void CheckSupportNPU();
+
+  bool IsKirinChip();
+
+  bool CheckOmBuildIr(const std::string &path);
+
+  std::string GetExecutorPath();
+
+ private:
+  bool is_npu_check_executor = false;
+
+  bool is_support_npu = false;
+
+  std::shared_ptr<hiai::AiModelMngerClient> client_ = nullptr;
+
+  std::vector<std::shared_ptr<hiai::AiModelDescription>> model_desc_;
+
+  std::shared_ptr<hiai::AiModelBuilder> mc_builder_ = nullptr;
+};
+
+}  // namespace mindspore::lite
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_NPU_UTILS_H_
--- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc
+++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc
@ -0,0 +1,187 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/agent/npu/subgraph_npu_kernel.h"
+#include "include/errorcode.h"
+#include "src/runtime/agent/npu/npu_executor.h"
+#include "include/graph/operator.h"
+#include "include/graph/graph.h"
+#include "src/tensor.h"
+#include "include/graph/model.h"
+#include "include/hiai_ir_build.h"
+#include "include/HiAiModelManagerService.h"
+#include "include/HiAiModelManagerType.h"
+#include "include/context.h"
+#include "include/version.h"
+#include "include/graph/op/array_defs.h"
+#include "src/common/file_utils.h"
+#include "src/common/common.h"
+#include "src/common/utils.h"
+#include "src/runtime/agent/npu/npu_converter_utils.h"
+#include "mindspore/lite/src/runtime/kernel/npu/npu_kernel.h"
+#include "src/runtime/agent/npu/npu_manager.h"
+namespace mindspore::kernel {
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+
+domi::ModelBufferData *SubGraphNpuKernel::BuildIRModel() {
+  ge::Graph graph("NPUGraph");
+
+  auto ret = BuildNPUInputOp();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Build NPU input operator failed.";
+    return nullptr;
+  }
+  ret = BuildNPUOutputOp();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Build NPU output operator failed.";
+    return nullptr;
+  }
+  graph.SetInputs(subgraph_input_op_).SetOutputs(subgraph_output_op_);
+  ge::Model model(GetOMModelName(), mindspore::lite::Version());
+  model.SetGraph(graph);
+  domi::HiaiIrBuild ir_build;
+  auto om_model_buff = new (std::nothrow) domi::ModelBufferData;
+  if (om_model_buff == nullptr) {
+    MS_LOG(ERROR) << "om model buffer is nullptr.";
+    return nullptr;
+  }
+  if (!ir_build.CreateModelBuff(model, *om_model_buff)) {
+    MS_LOG(ERROR) << "Create model buffer failed.";
+    delete om_model_buff;
+    return nullptr;
+  }
+  if (!ir_build.BuildIRModel(model, *om_model_buff)) {
+    MS_LOG(ERROR) << "Build IR model failed.";
+    ir_build.ReleaseModelBuff(*om_model_buff);
+    delete om_model_buff;
+    return nullptr;
+  }
+  return om_model_buff;
+}
+
+int SubGraphNpuKernel::Run() { return this->executor_->Run(in_tensors_, out_tensors_, nodes_, nullptr); }
+
+int SubGraphNpuKernel::BuildNPUInputOp() {
+  int count = 0;
+  subgraph_input_op_.clear();
+  for (auto node : this->nodes_) {
+    std::vector<ge::Operator *> node_input_op;
+    for (auto in_tensor : node->in_tensors()) {
+      if (IsSubGraphInputTensor(in_tensor)) {
+        auto tensor_name = node->name() + "_" + std::to_string(count++);
+        auto data = mindspore::lite::ConverterToNPUData(in_tensor, tensor_name);
+        subgraph_input_op_.push_back(*data);
+        node_input_op.push_back(data);
+        continue;
+      }
+
+      bool is_weight_tensor = true;
+      for (auto in_kernel : node->in_kernels()) {
+        if (IsContain(in_kernel->out_tensors(), in_tensor)) {
+          if (in_kernel->desc().arch == mindspore::kernel::kNPU) {
+            // input come from npu
+            auto npu_op = reinterpret_cast<NPUKernel *>(in_kernel)->GetNPUOp();
+            if (npu_op != nullptr) {
+              npu_op->GetOutputDesc(0).GetName();
+              node_input_op.push_back(npu_op);
+              is_weight_tensor = false;
+              break;
+            } else {
+              MS_LOG(ERROR) << in_kernel->type_str() << "NPU Operator is nullptr.";
+              return RET_ERROR;
+            }
+          } else {
+            MS_LOG(ERROR) << "The input of the intermediate node comes from the CPU";
+            return RET_ERROR;
+          }
+        }
+      }
+
+      // weight tensor
+      if (is_weight_tensor) {
+        if (!(node->Type() == schema::PrimitiveType_Conv2D || node->Type() == schema::PrimitiveType_DeConv2D ||
+              node->Type() == schema::PrimitiveType_DepthwiseConv2D ||
+              node->Type() == schema::PrimitiveType_DeDepthwiseConv2D)) {
+          auto name = node->name() + "_" + std::to_string(count++);
+          auto weight_const = new (std::nothrow) hiai::op::Const(node->name() + "_" + std::to_string(count++));
+          if (weight_const == nullptr) {
+            MS_LOG(ERROR) << "new weight const failed.";
+            return RET_ERROR;
+          }
+          auto weight_tensor = mindspore::lite::ConverterToNPUTensor(in_tensor);
+          weight_const->set_attr_value(weight_tensor);
+          node_input_op.push_back(weight_const);
+        }
+      }
+    }
+    // set input to NPU
+    reinterpret_cast<NPUKernel *>(node)->SetNPUInputs(node->in_tensors(), node->out_tensors(), node_input_op);
+  }
+  return RET_OK;
+}
+
+bool SubGraphNpuKernel::IsSubGraphInputTensor(lite::Tensor *inputs) { return IsContain(this->in_tensors(), inputs); }
+
+std::vector<ge::Operator> SubGraphNpuKernel::GetNPUNodes(const vector<kernel::LiteKernel *> &nodes) {
+  std::vector<ge::Operator> ops;
+  ops.reserve(nodes.size());
+  for (int i = 0; i < nodes.size(); i++) {
+    ops.push_back(*reinterpret_cast<NPUKernel *>(nodes[i])->GetNPUOp());
+  }
+  return ops;
+}
+
+int SubGraphNpuKernel::BuildNPUOutputOp() {
+  subgraph_output_op_ = GetNPUNodes(out_nodes_);
+  if (subgraph_output_op_.empty()) {
+    MS_LOG(ERROR) << "NPU subgraph output op is empty.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+void SubGraphNpuKernel::SetIndex(int index) { this->index_ = index; }
+
+std::string SubGraphNpuKernel::GetOMModelName() { return this->name_ + std::to_string(index_) + ".om"; }
+int SubGraphNpuKernel::Init() {
+  model_buffer_data_ = BuildIRModel();
+  if (model_buffer_data_ == nullptr) {
+    MS_LOG(ERROR) << "Build IR model failed.";
+    return RET_ERROR;
+  }
+
+  mindspore::lite::NPUManager::GetInstance()->AddModel(model_buffer_data_->data, model_buffer_data_->length,
+                                                       GetOMModelName(), context_->GetNpuInfo().frequency_);
+
+  executor_ = new (std::nothrow) mindspore::lite::NPUExecutor(GetOMModelName());
+
+  if (executor_ == nullptr) {
+    MS_LOG(ERROR) << "Create NPUExecutor failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int SubGraphNpuKernel::Prepare() {
+  if (executor_->Prepare(nodes_) != RET_OK) {
+    MS_LOG(ERROR) << "NPU executor prepare failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h
+++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.h
@ -0,0 +1,84 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_
+#include <vector>
+#include <string>
+#include "include/hiai_ir_build.h"
+#include "src/sub_graph_kernel.h"
+#include "src/runtime/agent/npu/npu_executor.h"
+#include "include/graph/op/all_ops.h"
+
+namespace mindspore::kernel {
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+class SubGraphNpuKernel : public SubGraphKernel {
+ public:
+  SubGraphNpuKernel(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                    const std::vector<kernel::LiteKernel *> &inKernels,
+                    const std::vector<kernel::LiteKernel *> &outKernels, const std::vector<kernel::LiteKernel *> &nodes,
+                    const lite::InnerContext *ctx = nullptr)
+      : SubGraphKernel(inputs, outputs, inKernels, outKernels, nodes, ctx) {
+    subgraph_type_ = kNpuSubGraph;
+    this->name_ = "NpuSubGraph";
+  }
+
+  ~SubGraphNpuKernel() override = default;
+
+  int Init() override;
+
+  int Prepare() override;
+
+  int PreProcess() override { return RET_OK; }
+
+  int Run() override;
+
+  int Run(const KernelCallBack &before, const KernelCallBack &after) override { return this->Run(); }
+
+  int PostProcess() override { return RET_OK; }
+
+  int ReSize() override {
+    MS_LOG(ERROR) << "NPU does not support the resize function temporarily.";
+    return RET_ERROR;
+  }
+
+  void SetIndex(int index);
+
+ private:
+  domi::ModelBufferData *BuildIRModel();
+
+  int BuildNPUInputOp();
+
+  int BuildNPUOutputOp();
+
+  std::vector<ge::Operator> GetNPUNodes(const std::vector<kernel::LiteKernel *> &nodes);
+
+  bool IsSubGraphInputTensor(lite::Tensor *inputs);
+
+  std::string GetOMModelName();
+
+ private:
+  int index_;
+
+  domi::ModelBufferData *model_buffer_data_;
+
+  std::vector<ge::Operator> subgraph_input_op_;
+
+  std::vector<ge::Operator> subgraph_output_op_;
+};
+}  // namespace mindspore::kernel
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_SUBGRAPH_NPU_KERNEL_H_
--- a/mindspore/lite/src/runtime/kernel/npu/add_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/add_npu.cc
@ -0,0 +1,50 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/npu/add_npu.h"
+#include "include/graph/op/all_ops.h"
+#include "src/kernel_registry.h"
+
+using mindspore::kernel::KERNEL_ARCH::kNPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::schema::PrimitiveType_Add;
+
+namespace mindspore::kernel {
+int AddNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                            OpParameter *opParameter) {
+  if (inputs[0]->shape() != inputs[1]->shape()) {
+    MS_LOG(INFO) << "ddk 500 does not support broadcast."
+                 << " shape 1 is:" << inputs[0]->shape() << " shape 2 is:" << inputs[1]->shape();
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+void AddNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                                const std::vector<ge::Operator *> &npu_inputs) {
+  op_ = new hiai::op::Add(name_);
+  op_->set_input_x1(*npu_inputs[0]);
+  op_->set_input_x2(*npu_inputs[1]);
+}
+ge::Operator *mindspore::kernel::AddNPUKernel::GetNPUOp() { return this->op_; }
+AddNPUKernel::~AddNPUKernel() {
+  if (op_ != nullptr) {
+    delete op_;
+    op_ = nullptr;
+  }
+}
+
+REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Add, NPUKernelCreator<AddNPUKernel>)
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/add_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/add_npu.h
@ -0,0 +1,41 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_ADD_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_ADD_H_
+#include <vector>
+#include "src/runtime/kernel/npu/npu_kernel.h"
+#include "include/graph/op/math_defs.h"
+namespace mindspore::kernel {
+class AddNPUKernel : public NPUKernel {
+ public:
+  AddNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+               const std::vector<lite::Tensor *> &outputs)
+      : NPUKernel(parameter, inputs, outputs) {}
+  ~AddNPUKernel() override;
+
+  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                OpParameter *opParameter) override;
+  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                    const std::vector<ge::Operator *> &npu_inputs) override;
+
+  ge::Operator *GetNPUOp() override;
+
+ private:
+  hiai::op::Add *op_;
+};
+}  // namespace mindspore::kernel
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_ADD_H_
--- a/mindspore/lite/src/runtime/kernel/npu/concat_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/concat_npu.cc
@ -0,0 +1,49 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/npu/concat_npu.h"
+#include "src/kernel_registry.h"
+
+using mindspore::kernel::KERNEL_ARCH::kNPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::schema::PrimitiveType_Concat;
+
+namespace mindspore::kernel {
+int ConcatNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                               OpParameter *opParameter) {
+  return RET_OK;
+}
+void ConcatNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
+                                   const std::vector<lite::Tensor *> &outputs,
+                                   const std::vector<ge::Operator *> &npu_inputs) {
+  op_ = new hiai::op::ConcatD(name_);
+  op_->set_attr_concat_dim(concat_parameter_->axis_);
+  op_->set_attr_N(npu_inputs.size());
+  op_->create_dynamic_input_x(npu_inputs.size());
+  for (int i = 0; i < npu_inputs.size(); ++i) {
+    op_->set_dynamic_input_x(i + 1, *npu_inputs[i]);
+  }
+}
+ge::Operator *mindspore::kernel::ConcatNPUKernel::GetNPUOp() { return this->op_; }
+ConcatNPUKernel::~ConcatNPUKernel() {
+  if (op_ != nullptr) {
+    delete op_;
+    op_ = nullptr;
+  }
+}
+
+REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Concat, NPUKernelCreator<ConcatNPUKernel>)
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/concat_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/concat_npu.h
@ -0,0 +1,44 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_CONCAT_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_CONCAT_H_
+#include <vector>
+#include "nnacl/concat_parameter.h"
+#include "src/runtime/kernel/npu/npu_kernel.h"
+#include "include/graph/op/all_ops.h"
+namespace mindspore::kernel {
+class ConcatNPUKernel : public NPUKernel {
+ public:
+  ConcatNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                  const std::vector<lite::Tensor *> &outputs)
+      : NPUKernel(parameter, inputs, outputs) {
+    concat_parameter_ = reinterpret_cast<ConcatParameter *>(parameter);
+  }
+  ~ConcatNPUKernel() override;
+
+  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                OpParameter *opParameter) override;
+  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                    const std::vector<ge::Operator *> &npu_inputs) override;
+  ge::Operator *GetNPUOp() override;
+
+ private:
+  hiai::op::ConcatD *op_;
+  ConcatParameter *concat_parameter_;
+};
+}  // namespace mindspore::kernel
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_CONCAT_H_
--- a/mindspore/lite/src/runtime/kernel/npu/div_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/div_npu.cc
@ -0,0 +1,51 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/npu/div_npu.h"
+#include "include/graph/op/all_ops.h"
+#include "src/kernel_registry.h"
+
+using mindspore::kernel::KERNEL_ARCH::kNPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::schema::PrimitiveType_Div;
+
+namespace mindspore::kernel {
+int DivNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                            OpParameter *opParameter) {
+  op_ = new hiai::op::RealDiv(name_);
+
+  if (inputs[0]->shape() != inputs[1]->shape()) {
+    MS_LOG(INFO) << "ddk 500 does not support broadcast."
+                 << " shape 1 is:" << inputs[0]->shape() << " shape 2 is:" << inputs[1]->shape();
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+void DivNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                                const std::vector<ge::Operator *> &npu_inputs) {
+  op_->set_input_x1(*npu_inputs[0]);
+  op_->set_input_x2(*npu_inputs[1]);
+}
+ge::Operator *mindspore::kernel::DivNPUKernel::GetNPUOp() { return this->op_; }
+DivNPUKernel::~DivNPUKernel() {
+  if (op_ != nullptr) {
+    delete op_;
+    op_ = nullptr;
+  }
+}
+
+REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Div, NPUKernelCreator<DivNPUKernel>)
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/div_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/div_npu.h
@ -0,0 +1,40 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_DIV_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_DIV_H_
+#include <vector>
+#include "src/runtime/kernel/npu/npu_kernel.h"
+#include "include/graph/op/math_defs.h"
+namespace mindspore::kernel {
+class DivNPUKernel : public NPUKernel {
+ public:
+  DivNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+               const std::vector<lite::Tensor *> &outputs)
+      : NPUKernel(parameter, inputs, outputs) {}
+  ~DivNPUKernel() override;
+
+  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                OpParameter *opParameter) override;
+  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                    const std::vector<ge::Operator *> &npu_inputs) override;
+  ge::Operator *GetNPUOp() override;
+
+ private:
+  hiai::op::RealDiv *op_;
+};
+}  // namespace mindspore::kernel
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_DIV_H_
--- a/mindspore/lite/src/runtime/kernel/npu/floor_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/floor_npu.cc
@ -0,0 +1,44 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/npu/floor_npu.h"
+#include "src/kernel_registry.h"
+
+using mindspore::kernel::KERNEL_ARCH::kNPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::schema::PrimitiveType_Floor;
+
+namespace mindspore::kernel {
+int FloorNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                              OpParameter *opParameter) {
+  return RET_OK;
+}
+void FloorNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                                  const std::vector<ge::Operator *> &npu_inputs) {
+  op_ = new hiai::op::Floor(name_);
+
+  op_->set_input_x(*npu_inputs[0]);
+}
+ge::Operator *mindspore::kernel::FloorNPUKernel::GetNPUOp() { return this->op_; }
+FloorNPUKernel::~FloorNPUKernel() {
+  if (op_ != nullptr) {
+    delete op_;
+    op_ = nullptr;
+  }
+}
+
+REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Floor, NPUKernelCreator<FloorNPUKernel>)
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/floor_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/floor_npu.h
@ -0,0 +1,40 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_FLOOR_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_FLOOR_H_
+#include <vector>
+#include "src/runtime/kernel/npu/npu_kernel.h"
+#include "include/graph/op/math_defs.h"
+namespace mindspore::kernel {
+class FloorNPUKernel : public NPUKernel {
+ public:
+  FloorNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                 const std::vector<lite::Tensor *> &outputs)
+      : NPUKernel(parameter, inputs, outputs) {}
+  ~FloorNPUKernel() override;
+
+  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                OpParameter *opParameter) override;
+  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                    const std::vector<ge::Operator *> &npu_inputs) override;
+  ge::Operator *GetNPUOp() override;
+
+ private:
+  hiai::op::Floor *op_;
+};
+}  // namespace mindspore::kernel
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_FLOOR_H_
--- a/mindspore/lite/src/runtime/kernel/npu/mul_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/mul_npu.cc
@ -0,0 +1,51 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/npu/mul_npu.h"
+#include "include/graph/op/all_ops.h"
+#include "src/kernel_registry.h"
+
+using mindspore::kernel::KERNEL_ARCH::kNPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::schema::PrimitiveType_Mul;
+
+namespace mindspore::kernel {
+int MulNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                            OpParameter *opParameter) {
+  if (inputs[0]->shape() != inputs[1]->shape()) {
+    MS_LOG(INFO) << "ddk 500 does not support broadcast."
+                 << " shape 1 is:" << inputs[0]->shape() << " shape 2 is:" << inputs[1]->shape();
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+void MulNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                                const std::vector<ge::Operator *> &npu_inputs) {
+  op_ = new hiai::op::Mul(name_);
+
+  op_->set_input_x1(*npu_inputs[0]);
+  op_->set_input_x2(*npu_inputs[1]);
+}
+ge::Operator *mindspore::kernel::MulNPUKernel::GetNPUOp() { return this->op_; }
+MulNPUKernel::~MulNPUKernel() {
+  if (op_ != nullptr) {
+    delete op_;
+    op_ = nullptr;
+  }
+}
+
+REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Mul, NPUKernelCreator<MulNPUKernel>)
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/mul_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/mul_npu.h
@ -0,0 +1,40 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_MUL_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_MUL_H_
+#include <vector>
+#include "src/runtime/kernel/npu/npu_kernel.h"
+#include "include/graph/op/math_defs.h"
+namespace mindspore::kernel {
+class MulNPUKernel : public NPUKernel {
+ public:
+  MulNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+               const std::vector<lite::Tensor *> &outputs)
+      : NPUKernel(parameter, inputs, outputs) {}
+  ~MulNPUKernel() override;
+
+  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                OpParameter *opParameter) override;
+  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                    const std::vector<ge::Operator *> &npu_inputs) override;
+  ge::Operator *GetNPUOp() override;
+
+ private:
+  hiai::op::Mul *op_;
+};
+}  // namespace mindspore::kernel
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_Mul_H_
--- a/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h
+++ b/mindspore/lite/src/runtime/kernel/npu/npu_kernel.h
@ -0,0 +1,69 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_KERNEL_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_KERNEL_H_
+
+#include <vector>
+#include "src/lite_kernel.h"
+#include "include/errorcode.h"
+#include "include/graph/graph.h"
+#include "src/kernel_registry.h"
+
+using mindspore::kernel::LiteKernel;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+namespace mindspore::kernel {
+class NPUKernel : public LiteKernel {
+ public:
+  NPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+            const std::vector<lite::Tensor *> &outputs)
+      : LiteKernel(parameter, inputs, outputs, nullptr, nullptr) {}
+  ~NPUKernel() override = default;
+
+  int Run() override { return RET_ERROR; }
+
+  virtual int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                        OpParameter *opParameter) {
+    return RET_OK;
+  }
+
+  virtual ge::Operator *GetNPUOp() = 0;
+
+  virtual void SetNPUInputs(const std::vector<mindspore::lite::Tensor *> &inputs,
+                            const std::vector<lite::Tensor *> &outputs,
+                            const std::vector<ge::Operator *> &npu_inputs) = 0;
+};
+template <class T>
+kernel::LiteKernel *NPUKernelCreator(const std::vector<lite::Tensor *> &inputs,
+                                     const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
+                                     const lite::InnerContext *ctx, const kernel::KernelKey &desc,
+                                     const mindspore::lite::PrimitiveC *primitive) {
+  auto *kernel = new (std::nothrow) T(opParameter, inputs, outputs);
+  if (kernel == nullptr) {
+    MS_LOG(ERROR) << "kernel " << opParameter->name_ << "is nullptr.";
+    free(opParameter);
+    return nullptr;
+  }
+
+  auto ret = kernel->IsSupport(inputs, outputs, opParameter);
+  if (ret != RET_OK) {
+    return nullptr;
+  }
+  return kernel;
+}
+}  // namespace mindspore::kernel
+#endif  // LITE_MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPUKERNEL_H_
--- a/mindspore/lite/src/runtime/kernel/npu/reshape_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/reshape_npu.cc
@ -0,0 +1,47 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/npu/reshape_npu.h"
+#include "src/kernel_registry.h"
+#include "include/graph/op/all_ops.h"
+#include "src/runtime/agent/npu/npu_converter_utils.h"
+using mindspore::kernel::KERNEL_ARCH::kNPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::schema::PrimitiveType_Reshape;
+
+namespace mindspore::kernel {
+int ReshapeNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                                OpParameter *opParameter) {
+  return RET_OK;
+}
+void ReshapeNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
+                                    const std::vector<lite::Tensor *> &outputs,
+                                    const std::vector<ge::Operator *> &npu_inputs) {
+  op_ = new hiai::op::Reshape(name_);
+
+  op_->set_input_x(*npu_inputs[0]);
+  op_->set_input_shape(*npu_inputs[1]);
+}
+ge::Operator *mindspore::kernel::ReshapeNPUKernel::GetNPUOp() { return this->op_; }
+ReshapeNPUKernel::~ReshapeNPUKernel() {
+  if (op_ != nullptr) {
+    delete op_;
+    op_ = nullptr;
+  }
+}
+
+REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Reshape, NPUKernelCreator<ReshapeNPUKernel>)
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/reshape_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/reshape_npu.h
@ -0,0 +1,41 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_RESHAPE_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_RESHAPE_H_
+#include <vector>
+#include "nnacl/conv_parameter.h"
+#include "src/runtime/kernel/npu/npu_kernel.h"
+#include "include/graph/op/all_ops.h"
+namespace mindspore::kernel {
+class ReshapeNPUKernel : public NPUKernel {
+ public:
+  ReshapeNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                   const std::vector<lite::Tensor *> &outputs)
+      : NPUKernel(parameter, inputs, outputs) {}
+  ~ReshapeNPUKernel() override;
+
+  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                OpParameter *opParameter) override;
+  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                    const std::vector<ge::Operator *> &npu_inputs) override;
+  ge::Operator *GetNPUOp() override;
+
+ private:
+  hiai::op::Reshape *op_;
+};
+}  // namespace mindspore::kernel
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_RESHAPE_H_
--- a/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/scale_npu.cc
@ -0,0 +1,46 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/npu/scale_npu.h"
+#include "src/kernel_registry.h"
+
+using mindspore::kernel::KERNEL_ARCH::kNPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::schema::PrimitiveType_Scale;
+
+namespace mindspore::kernel {
+int ScaleNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                              OpParameter *opParameter) {
+  return RET_OK;
+}
+void ScaleNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                                  const std::vector<ge::Operator *> &npu_inputs) {
+  op_ = new hiai::op::Scale(name_);
+  op_->set_attr_axis(this->axis_);
+  op_->set_input_x(*npu_inputs[0]);
+  op_->set_input_scale(*npu_inputs[1]);
+  op_->set_input_bias(*npu_inputs[2]);
+}
+ge::Operator *mindspore::kernel::ScaleNPUKernel::GetNPUOp() { return this->op_; }
+ScaleNPUKernel::~ScaleNPUKernel() {
+  if (op_ != nullptr) {
+    delete op_;
+    op_ = nullptr;
+  }
+}
+
+REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_Scale, NPUKernelCreator<ScaleNPUKernel>)
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/scale_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/scale_npu.h
@ -0,0 +1,44 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_SCALE_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_SCALE_H_
+#include <vector>
+#include "nnacl/scale.h"
+#include "src/runtime/kernel/npu/npu_kernel.h"
+#include "include/graph/op/nn_defs.h"
+namespace mindspore::kernel {
+class ScaleNPUKernel : public NPUKernel {
+ public:
+  ScaleNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                 const std::vector<lite::Tensor *> &outputs)
+      : NPUKernel(parameter, inputs, outputs) {
+    axis_ = reinterpret_cast<ScaleParameter *>(parameter)->axis_;
+  }
+  ~ScaleNPUKernel() override;
+
+  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                OpParameter *opParameter) override;
+  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                    const std::vector<ge::Operator *> &npu_inputs) override;
+  ge::Operator *GetNPUOp() override;
+
+ private:
+  hiai::op::Scale *op_;
+  int axis_;
+};
+}  // namespace mindspore::kernel
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_Scale_H_
--- a/mindspore/lite/src/runtime/kernel/npu/softmax_npu.cc
+++ b/mindspore/lite/src/runtime/kernel/npu/softmax_npu.cc
@ -0,0 +1,50 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/npu/softmax_npu.h"
+#include "src/kernel_registry.h"
+
+using mindspore::kernel::KERNEL_ARCH::kNPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::schema::PrimitiveType_SoftMax;
+
+namespace mindspore::kernel {
+int SoftmaxNPUKernel::IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                                OpParameter *opParameter) {
+  return RET_OK;
+}
+void SoftmaxNPUKernel::SetNPUInputs(const std::vector<lite::Tensor *> &inputs,
+                                    const std::vector<lite::Tensor *> &outputs,
+                                    const std::vector<ge::Operator *> &npu_inputs) {
+  op_ = new hiai::op::Softmax(name_);
+
+  if (axis_ == -1) {
+    op_->set_attr_axis(inputs[0]->shape().size() - 1);
+  } else {
+    op_->set_attr_axis(axis_);
+  }
+  op_->set_input_x(*npu_inputs[0]);
+}
+ge::Operator *mindspore::kernel::SoftmaxNPUKernel::GetNPUOp() { return this->op_; }
+SoftmaxNPUKernel::~SoftmaxNPUKernel() {
+  if (op_ != nullptr) {
+    delete op_;
+    op_ = nullptr;
+  }
+}
+
+REG_KERNEL(kNPU, kNumberTypeFloat32, PrimitiveType_SoftMax, NPUKernelCreator<SoftmaxNPUKernel>)
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/npu/softmax_npu.h
+++ b/mindspore/lite/src/runtime/kernel/npu/softmax_npu.h
@ -0,0 +1,45 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_SOFTMAX_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_SOFTMAX_H_
+#include <vector>
+#include "src/runtime/kernel/npu/npu_kernel.h"
+#include "nnacl/softmax_parameter.h"
+#include "include/graph/op/nn_defs.h"
+namespace mindspore::kernel {
+class SoftmaxNPUKernel : public NPUKernel {
+ public:
+  SoftmaxNPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                   const std::vector<lite::Tensor *> &outputs)
+      : NPUKernel(parameter, inputs, outputs) {
+    auto softmax_parameter = reinterpret_cast<SoftmaxParameter *>(parameter);
+    axis_ = softmax_parameter->axis_;
+  }
+  ~SoftmaxNPUKernel() override;
+
+  int IsSupport(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                OpParameter *opParameter) override;
+  void SetNPUInputs(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+                    const std::vector<ge::Operator *> &npu_inputs) override;
+  ge::Operator *GetNPUOp() override;
+
+ private:
+  hiai::op::Softmax *op_;
+  int axis_;
+};
+}  // namespace mindspore::kernel
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NPU_KERNEL_NPU_SOFTMAX_H_
--- a/mindspore/lite/src/scheduler.cc
+++ b/mindspore/lite/src/scheduler.cc
@ -28,10 +28,14 @@
 #include "src/runtime/kernel/opencl/opencl_subgraph.h"
 #include "src/runtime/opencl/opencl_runtime.h"
 #endif
-
+#if SUPPORT_NPU
+#include "src/runtime/agent/npu/subgraph_npu_kernel.h"
+#include "src/runtime/agent/npu/npu_manager.h"
+#endif
 namespace mindspore::lite {
 using kernel::KERNEL_ARCH::kCPU;
 using kernel::KERNEL_ARCH::kGPU;
+using kernel::KERNEL_ARCH::kNPU;

 int Scheduler::Schedule(const lite::Model *model, std::vector<Tensor *> *tensors,
                        std::vector<kernel::LiteKernel *> *kernels) {
@ -227,13 +231,13 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels) {
      break;
    }
    auto head_kernel = *head_kernel_iter;
-    if (head_kernel->desc().arch == mindspore::kernel::kNPU || head_kernel->desc().arch == mindspore::kernel::kAPU) {
-      MS_LOG(ERROR) << "Not support NPU and APU now";
+    if (head_kernel->desc().arch == mindspore::kernel::kAPU) {
+      MS_LOG(ERROR) << "Not support APU now";
      return RET_NOT_SUPPORT;
    }
    auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(head_kernel);
    auto sub_kernels = FindAllSubGraphKernels(head_kernel, &is_kernel_sinked);
-    auto subgraph = CreateSubGraphKernel(sub_kernels, cur_sub_graph_type);
+    auto subgraph = CreateSubGraphKernel(sub_kernels, cur_sub_graph_type, kernels->size());
    if (subgraph == nullptr) {
      MS_LOG(ERROR) << "Create SubGraphKernel failed";
      return RET_ERROR;
@ -244,8 +248,8 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels) {
 }

 kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel::LiteKernel *> &kernels,
-                                                        kernel::SubGraphType type) {
-  if (type == kernel::kApuSubGraph || type == kernel::kNpuSubGraph) {
+                                                        kernel::SubGraphType type, int index) {
+  if (type == kernel::kApuSubGraph) {
    return nullptr;
  }
  std::vector<Tensor *> input_tensors = kernel::LiteKernelUtil::SubgraphInputTensors(kernels);
@ -259,6 +263,17 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel
    return sub_kernel;
 #else
    return nullptr;
+#endif
+  }
+  if (type == kernel::kNpuSubGraph) {
+#if SUPPORT_NPU
+    auto sub_kernel =
+      new kernel::SubGraphNpuKernel(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_);
+    sub_kernel->SetIndex(index);
+    sub_kernel->Init();
+    return sub_kernel;
+#else
+    return nullptr;
 #endif
  }
  if (type == kernel::kCpuFP16SubGraph) {
@ -280,6 +295,19 @@ kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<Tensor *> &in_tens
  MS_ASSERT(primitive != nullptr);
  TypeId data_type = GetFirstFp32Fp16OrInt8Type(in_tensors);
  kernel::KernelKey desc{kCPU, data_type, static_cast<schema::PrimitiveType>(primitive->Type())};
+#if SUPPORT_NPU
+  if (context_->IsNpuEnabled()) {
+    kernel::KernelKey npu_desc{kNPU, desc.data_type, desc.type};
+    auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, primitive, context_, npu_desc);
+    if (kernel != nullptr) {
+      MS_LOG(DEBUG) << "Get npu op success: " << schema::EnumNamePrimitiveType(npu_desc.type) << " " << node->name_;
+      return kernel;
+    } else {
+      MS_LOG(DEBUG) << "Get npu op failed, scheduler to cpu: " << schema::EnumNamePrimitiveType(npu_desc.type) << " "
+                    << node->name_;
+    }
+  }
+#endif
 #if SUPPORT_GPU
  if (context_->IsGpuEnabled()) {
    kernel::KernelKey gpu_desc{kGPU, desc.data_type, desc.type};
--- a/mindspore/lite/src/scheduler.h
+++ b/mindspore/lite/src/scheduler.h
@ -46,7 +46,7 @@ class Scheduler {
  int ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels);

  kernel::SubGraphKernel *CreateSubGraphKernel(const std::vector<kernel::LiteKernel *> &kernels,
-                                               kernel::SubGraphType type);
+                                               kernel::SubGraphType type, int index);

  std::vector<kernel::LiteKernel *> FindAllSubGraphKernels(
    kernel::LiteKernel *head_kernel, std::map<const kernel::LiteKernel *, bool> *sinked_kernel_map);
--- a/mindspore/lite/test/CMakeLists.txt
+++ b/mindspore/lite/test/CMakeLists.txt
@ -41,7 +41,6 @@ file(GLOB KERNEL_OP_TRAIN_SRC
 if (SUPPORT_TRAIN)
        list(APPEND KERNEL_OP_SRC ${KERNEL_OP_TRAIN_SRC})
 endif()
-
 if (PLATFORM_ARM64)
    # assembly
    file(GLOB TEST_ASSEMBLY_SRC ${LITE_DIR}/nnacl/assembly/arm64/*.s
@ -304,6 +303,10 @@ endif()
 	    #minddata-lite
 	    #           )
 	    #endif()
+if (SUPPORT_NPU)
+    include_directories(${DDK_PATH})
+    target_link_libraries(lite-test npu_kernel_mid)
+endif ()
 if (ENABLE_CONVERTER)
    target_link_libraries(lite-test
            anf_importer_mid
--- a/mindspore/lite/tools/benchmark/benchmark.cc
+++ b/mindspore/lite/tools/benchmark/benchmark.cc
@ -497,6 +497,12 @@ int Benchmark::RunBenchmark() {
    context->device_list_.push_back(gpu_device_ctx);
  }

+  if (flags_->device_ == "NPU") {
+    DeviceContext npu_device_ctx{DT_NPU};
+    npu_device_ctx.device_info_.npu_device_info_.frequency_ = 3;
+    context->device_list_.push_back(npu_device_ctx);
+  }
+
  context->thread_num_ = flags_->num_threads_;

  session_ = session::LiteSession::CreateSession(context.get());
@ -702,7 +708,7 @@ int Benchmark::Init() {
    return RET_ERROR;
  }

-  if (flags_->device_ != "CPU" && flags_->device_ != "GPU") {
+  if (flags_->device_ != "CPU" && flags_->device_ != "GPU" && flags_->device_ != "NPU") {
    MS_LOG(ERROR) << "Device type:" << flags_->device_ << " is not supported.";
    std::cerr << "Device type:" << flags_->device_ << " is not supported." << std::endl;
    return RET_ERROR;
--- a/mindspore/lite/tools/benchmark/benchmark.h
+++ b/mindspore/lite/tools/benchmark/benchmark.h
@ -59,7 +59,7 @@ class MS_API BenchmarkFlags : public virtual FlagParser {
    // common
    AddFlag(&BenchmarkFlags::model_file_, "modelFile", "Input model file", "");
    AddFlag(&BenchmarkFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", "");
-    AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU", "CPU");
+    AddFlag(&BenchmarkFlags::device_, "device", "CPU | GPU | NPU", "CPU");
    AddFlag(&BenchmarkFlags::cpu_bind_mode_, "cpuBindMode",
            "Input 0 for NO_BIND, 1 for HIGHER_CPU, 2 for MID_CPU, defalut value: 1", 1);
    // MarkPerformance