support siteAI:append infershape

2020-09-14 11:36:07 +08:00 · 2020-09-14 11:36:07 +08:00 · a37b1cfe65
parent c000ffd809
commit a37b1cfe65
21 changed files with 415 additions and 116 deletions
--- a/mindspore/lite/include/lite_session.h
+++ b/mindspore/lite/include/lite_session.h
@ -113,7 +113,8 @@ class MS_API LiteSession {

  /// \brief Resize inputs shape.
  ///
-  /// \param[in] inputs Define the new inputs shape.
+  /// \param[in] inputs Define the inputs of the model.
+  /// \param[in] inputs Define the inputs new shape.
  ///
  /// \return STATUS as an error code of resize inputs, STATUS is defined in errorcode.h.
  virtual int Resize(const std::vector<tensor::MSTensor *> &inputs, const std::vector<std::vector<int>>& dims) = 0;
--- a/mindspore/lite/internal/CMakeLists.txt
+++ b/mindspore/lite/internal/CMakeLists.txt
@ -4,18 +4,22 @@ set(TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../)

 include_directories(${TOP_DIR})

-file(GLOB_RECURSE C_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cc)
 file(GLOB KERNEL_SRC
-        ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/*.c
-        ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/*.c
-        ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32_grad/*.c
-        ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/int8/*.c
-        ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/quantization/*.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/arithmetic_common.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/activation.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/arithmetic_self.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/arithmetic.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32/matmul.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/fp32_grad/activation_grad.c
        ${CMAKE_CURRENT_SOURCE_DIR}/src/kernel/fp32/*.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/kernel/fp32_grad/*.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/kernel/common/*.cc
        )
 list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/opt_op_handler.c)

 set(CCSRC
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/lite_session.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/ms_tensor.cc
        ${TOP_DIR}/src/common/log_adapter.cc
        ${TOP_DIR}/src/runtime/allocator.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/../../core/gvar/logging_level.cc
@ -23,11 +27,12 @@ set(CCSRC

 if (PLATFORM_ARM64)
    # assembly
-    file(GLOB ASSEMBLY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/assembly/arm64/*.s
-            ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/assembly/arm64/*.S)
+    file(GLOB ASSEMBLY_SRC
+            ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/assembly/arm64/MatmulFp32OptRemain.S
+            ${CMAKE_CURRENT_SOURCE_DIR}/../nnacl/assembly/arm64/MatmulFp32Opt.S)
    set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
    set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC})
-    add_library(mslite_internal SHARED ${C_SRC} ${CCSRC} ${KERNEL_SRC})
+    add_library(mslite_internal SHARED ${CCSRC} ${KERNEL_SRC})
    target_link_libraries(mslite_internal log)
 endif()

--- a/mindspore/lite/internal/include/lite_session.h
+++ b/mindspore/lite/internal/include/lite_session.h
@ -84,7 +84,7 @@ typedef struct LiteSession {
  /// \param[in] inputs Define the new inputs shape.
  ///
  /// \return STATUS as an error code of resize inputs, STATUS is defined in errorcode.h.
-  int Resize(const TensorPtrVector &inputs, Int32VectorVector dims);
+  int Resize(const TensorPtrVector &inputs, const Int32VectorVector &dims);
 } LiteSession;

 #endif  // MINDSPORE_LITE_INCLUDE_LITE_SESSION_H
--- a/mindspore/lite/internal/include/lite_utils.h
+++ b/mindspore/lite/internal/include/lite_utils.h
@ -21,12 +21,13 @@

 struct MSTensor;
 struct Node;
+using TensorPtr = MSTensor *;
 using TensorPtrVector = std::vector<MSTensor *>;
 using Uint32Vector = std::vector<uint32_t>;
 using String = std::string;
 using StringVector = std::vector<std::string>;
 using ShapeVector = std::vector<int>;
 using NodePtrVector = std::vector<struct Node *>;
-using Int32Vector = std::vector<int32_t>;
+using Int32Vector = std::vector<int>;
 using Int32VectorVector = std::vector<Int32Vector>;
 #endif  // MINDSPORE_LITE_INCLUDE_LITE_UTILS_H_
--- a/mindspore/lite/internal/include/model.h
+++ b/mindspore/lite/internal/include/model.h
@ -182,6 +182,7 @@ enum KernelType {
    NegGrad,
    LogGrad,
    BatchToSpaceND,
+    END,
 };

 enum ActivationType {
--- a/mindspore/lite/internal/src/kernel/common/common_infershape.cc
+++ b/mindspore/lite/internal/src/kernel/common/common_infershape.cc
@ -0,0 +1,31 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "internal/src/kernel/common/common_infershape.h"
+#include "internal/include/errorcode.h"
+#include "internal/include/ms_tensor.h"
+#include "utils/log_adapter.h"
+
+int DoCommonInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors) {
+  TensorPtr input = in_tensors.at(0);
+  MS_ASSERT(input != nullptr);
+  TensorPtr output = out_tensors.at(0);
+  MS_ASSERT(output != nullptr);
+  output->format_ = input->format_;
+  output->data_type_ = input->data_type_;
+  output->shape_ = input->shape_;
+  return RET_OK;
+}
--- a/mindspore/lite/internal/src/kernel/common/common_infershape.h
+++ b/mindspore/lite/internal/src/kernel/common/common_infershape.h
@ -0,0 +1,24 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_INTERNAL_SRC_KERNEL_COMMON_INFERSHAPE_H_
+#define MINDSPORE_LITE_INTERNAL_SRC_KERNEL_COMMON_INFERSHAPE_H_
+
+#include "internal/include/model.h"
+
+int DoCommonInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors);
+
+#endif  // MINDSPORE_LITE_INTERNAL_SRC_KERNEL_COMMON_INFERSHAPE_H_
--- a/mindspore/lite/internal/src/kernel/fp32/activation.cc
+++ b/mindspore/lite/internal/src/kernel/fp32/activation.cc
@ -15,13 +15,18 @@
 */

 #include "internal/src/kernel/fp32/activation.h"
+#include "internal/src/kernel/common/common_infershape.h"
 #include "internal/include/errorcode.h"
 #include "internal/include/ms_tensor.h"
 #include "nnacl/fp32/activation.h"
 #include "utils/log_adapter.h"
 #include "nnacl/errorcode.h"

-int DoActivation(TensorPtrVector in_tensors, TensorPtrVector out_tensors, Node *node,
+int DoActivationInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) {
+  return DoCommonInferShape(in_tensors, out_tensors);
+}
+
+int DoActivation(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
                 mindspore::lite::Allocator *allocator) {
  ActivationParameter *param = (ActivationParameter *)node->primitive_;
  int ret = RET_OK;
--- a/mindspore/lite/internal/src/kernel/fp32/activation.h
+++ b/mindspore/lite/internal/src/kernel/fp32/activation.h
@ -20,7 +20,8 @@
 #include "internal/include/model.h"
 #include "src/runtime/allocator.h"

-int DoActivation(TensorPtrVector in_tensors, TensorPtrVector out_tensors, Node *node,
+int DoActivationInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param);
+int DoActivation(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
                 mindspore::lite::Allocator *allocator);

 #endif  // MINDSPORE_LITE_INTERNAL_SRC_KERNEL_FP32_ACTIVATION_H_
--- a/mindspore/lite/internal/src/kernel/fp32/arithmetic_self.cc
+++ b/mindspore/lite/internal/src/kernel/fp32/arithmetic_self.cc
@ -15,12 +15,18 @@
 */

 #include "internal/src/kernel/fp32/arithmetic_self.h"
+#include "internal/src/kernel/common/common_infershape.h"
 #include "internal/include/errorcode.h"
 #include "internal/include/ms_tensor.h"
 #include "utils/log_adapter.h"
 #include "nnacl/fp32/arithmetic_self.h"

-int DoArithmeticSelf(TensorPtrVector in_tensors, TensorPtrVector out_tensors, Node *node,
+int DoArithmeticSelfInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors,
+                               OpParameter *param) {
+  return DoCommonInferShape(in_tensors, out_tensors);
+}
+
+int DoArithmeticSelf(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
                     mindspore::lite::Allocator *allocator) {
  size_t data_size = in_tensors[0]->ElementsNum();
  OpParameter *param = node->primitive_;
--- a/mindspore/lite/internal/src/kernel/fp32/arithmetic_self.h
+++ b/mindspore/lite/internal/src/kernel/fp32/arithmetic_self.h
@ -20,7 +20,9 @@
 #include "internal/include/model.h"
 #include "src/runtime/allocator.h"

-int DoArithmeticSelf(TensorPtrVector in_tensors, TensorPtrVector out_tensors, Node *node,
+int DoArithmeticSelfInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors,
+                               OpParameter *param);
+int DoArithmeticSelf(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
                     mindspore::lite::Allocator *allocator);

 #endif  // MINDSPORE_LITE_INTERNAL_SRC_KERNEL_FP32_ARITHMETIC_SELF_H_
--- a/mindspore/lite/internal/src/kernel/fp32/matmul.cc
+++ b/mindspore/lite/internal/src/kernel/fp32/matmul.cc
@ -71,7 +71,50 @@ void FreeMatMulKernelData(MatMulCPUKernelData *kernel_data, mindspore::lite::All
  free(kernel_data);
 }

-int DoMatMul(TensorPtrVector in_tensors, TensorPtrVector out_tensors, Node *node,
+static void SwapDims(Int32Vector *dims, int index1, int index2) {
+  int tmp = dims->at(index1);
+  dims->at(index1) = dims->at(index2);
+  dims->at(index2) = tmp;
+}
+
+int DoMatMulInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param) {
+  MS_ASSERT(this->primitive_ != nullptr);
+  TensorPtr input0 = in_tensors.at(0);
+  MS_ASSERT(input0 != nullptr);
+  TensorPtr input1 = in_tensors.at(1);
+  MS_ASSERT(input1 != nullptr);
+  TensorPtr output = out_tensors.at(0);
+  MS_ASSERT(output != nullptr);
+
+  output->data_type_ = input0->data_type_;
+  output->format_ = input0->format_;
+
+  Int32Vector a_shape = input0->shape_;
+  Int32Vector b_shape = input1->shape_;
+  if (a_shape.size() < 2 || b_shape.size() < 2) {
+    MS_LOG(ERROR) << "inputs shape is invalid";
+    return RET_INPUT_TENSOR_ERROR;
+  }
+  for (size_t i = 0; i < a_shape.size() - 2; ++i) {
+    if (a_shape[i] != b_shape[i]) {
+      MS_LOG(ERROR) << "Op MatMul's dimensions must be equal";
+      return RET_INPUT_TENSOR_ERROR;
+    }
+  }
+
+  MatMulParameter *matmul_param = (MatMulParameter *)param;
+  if (matmul_param->a_transpose_) {
+    SwapDims(&a_shape, a_shape.size() - 1, a_shape.size() - 2);
+  }
+  if (matmul_param->b_transpose_) {
+    SwapDims(&b_shape, b_shape.size() - 1, b_shape.size() - 2);
+  }
+  output->shape_ = a_shape;
+  output->shape_.at(a_shape.size() - 1) = b_shape.at(b_shape.size() - 1);
+  return RET_OK;
+}
+
+int DoMatMul(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
             mindspore::lite::Allocator *allocator) {
  if (in_tensors[0]->data_ == NULL || in_tensors[1]->data_ ==NULL) {
    MS_LOG(ERROR) << "input data is NULL!";
--- a/mindspore/lite/internal/src/kernel/fp32/matmul.h
+++ b/mindspore/lite/internal/src/kernel/fp32/matmul.h
@ -20,7 +20,8 @@
 #include "internal/include/model.h"
 #include "src/runtime/allocator.h"

-int DoMatMul(TensorPtrVector in_tensors, TensorPtrVector out_tensors, Node *node,
+int DoMatMulInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param);
+int DoMatMul(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
             mindspore::lite::Allocator *allocator);

 #endif  // MINDSPORE_LITE_INTERNAL_SRC_KERNEL_FP32_MATMUL_H_
--- a/mindspore/lite/internal/src/kernel/fp32_grad/activation_grad.cc
+++ b/mindspore/lite/internal/src/kernel/fp32_grad/activation_grad.cc
@ -15,13 +15,19 @@
 */

 #include "internal/src/kernel/fp32_grad/activation_grad.h"
+#include "internal/src/kernel/common/common_infershape.h"
 #include "internal/include/errorcode.h"
 #include "internal/include/ms_tensor.h"
 #include "nnacl/fp32_grad/activation_grad.h"
 #include "utils/log_adapter.h"
 #include "nnacl/errorcode.h"

-int DoActivationGrad(TensorPtrVector in_tensors, TensorPtrVector out_tensors, Node *node,
+int DoActivationGradInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors,
+                               OpParameter *param) {
+  return DoCommonInferShape(in_tensors, out_tensors);
+}
+
+int DoActivationGrad(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
                     mindspore::lite::Allocator *allocator) {
  ActivationGradParameter *param = (ActivationGradParameter *)node->primitive_;
  int ret = RET_OK;
--- a/mindspore/lite/internal/src/kernel/fp32_grad/activation_grad.h
+++ b/mindspore/lite/internal/src/kernel/fp32_grad/activation_grad.h
@ -20,7 +20,9 @@
 #include "internal/include/model.h"
 #include "src/runtime/allocator.h"

-int DoActivationGrad(TensorPtrVector in_tensors, TensorPtrVector out_tensors, Node *node,
+int DoActivationGradInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors,
+                               OpParameter *param);
+int DoActivationGrad(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
                     mindspore::lite::Allocator *allocator);

 #endif  // MINDSPORE_LITE_INTERNAL_SRC_KERNEL_FP32_GRAD_ACTIVATION_GRAD_H_
--- a/mindspore/lite/internal/src/kernel/fp32_grad/arithmetic_self_grad.cc
+++ b/mindspore/lite/internal/src/kernel/fp32_grad/arithmetic_self_grad.cc
@ -15,13 +15,19 @@
 */

 #include "internal/src/kernel/fp32_grad/arithmetic_self_grad.h"
+#include "internal/src/kernel/common/common_infershape.h"
 #include "internal/include/errorcode.h"
 #include "internal/include/ms_tensor.h"
 #include "utils/log_adapter.h"
 #include "nnacl/fp32/arithmetic_self.h"
 #include "nnacl/fp32/arithmetic.h"

-int DoArithmeticGradSelf(TensorPtrVector in_tensors, TensorPtrVector out_tensors, Node *node,
+int DoArithmeticSelfGradInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors,
+                                   OpParameter *param) {
+  return DoCommonInferShape(in_tensors, out_tensors);
+}
+
+int DoArithmeticSelfGrad(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
                         mindspore::lite::Allocator *allocator) {
  size_t data_size = in_tensors[0]->ElementsNum();
  OpParameter *param = node->primitive_;
--- a/mindspore/lite/internal/src/kernel/fp32_grad/arithmetic_self_grad.h
+++ b/mindspore/lite/internal/src/kernel/fp32_grad/arithmetic_self_grad.h
@ -20,7 +20,9 @@
 #include "internal/include/model.h"
 #include "src/runtime/allocator.h"

-int DoArithmeticGradSelf(TensorPtrVector in_tensors, TensorPtrVector out_tensors, Node *node,
+int DoArithmeticSelfGradInferShape(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors,
+                                   OpParameter *param);
+int DoArithmeticSelfGrad(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
                         mindspore::lite::Allocator *allocator);

 #endif  // MINDSPORE_LITE_INTERNAL_SRC_KERNEL_FP32_GRAD_ARITHMETIC_SELF_GRAD_H_
--- a/mindspore/lite/internal/src/lite_session.cc
+++ b/mindspore/lite/internal/src/lite_session.cc
@ -25,47 +25,20 @@
 #include "internal/src/kernel/fp32_grad/arithmetic_self_grad.h"
 #include "internal/src/kernel/fp32_grad/activation_grad.h"

-static Context *g_Ctx;
-static Model *g_Model;
-static LiteSession g_Session;
-static mindspore::lite::DefaultAllocator allocator;
+static Context *g_ctx;
+static Model *g_model;
+static LiteSession g_session;
+static mindspore::lite::DefaultAllocator g_allocator;
+static bool g_infershape_interrupt = false;
+static bool g_first_load = true;
+typedef int (*InferShape)(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, OpParameter *param);
+typedef int (*RunKernel)(const TensorPtrVector &in_tensors, const TensorPtrVector &out_tensors, Node *node,
+                         mindspore::lite::Allocator *allocator);
+static InferShape g_infershape_funcs[KernelType::END];
+static RunKernel g_runkernel_funcs[KernelType::END];

-LiteSession *LiteSession::CreateSession(Context *context) {
-  g_Ctx = context;
-  return &g_Session;
-}
-
-int LiteSession::CompileGraph(Model *model) {
-  g_Model = model;
-  for (auto in : g_Model->input_indices_) {
-    g_Model->all_tensors_[in]->data_ = allocator.Malloc(g_Model->all_tensors_[in]->Size());
-  }
-  return 0;
-}
-
-TensorPtrVector LiteSession::GetInputs() const {
-  TensorPtrVector in(g_Model->input_indices_.size());
-  //    for(auto index : g_Model->input_indices_){
-  //        in.emplace_back(g_Model->all_tensors_[index]);
-  //    }
-  return in;
-}
-
-TensorPtrVector LiteSession::GetInputsByName(const String &node_name) const { return TensorPtrVector(); }
-
-TensorPtrVector LiteSession::GetOutputsByNodeName(const String &node_name) const { return TensorPtrVector(); }
-
-TensorPtrVector LiteSession::GetOutputs() const {
-  TensorPtrVector out(g_Model->output_indices_.size());
-  //    for(auto index : g_Model->output_indices_){
-  //        out.emplace_back(g_Model->all_tensors_[index]);
-  //    }
-  return out;
-}
-
-int LiteSession::RunGraph() {
-  // invoke nnacl kernel
-  NodePtrVector nodes = g_Model->nodes_;
+static int ModelInferShape() {
+  NodePtrVector nodes = g_model->nodes_;
  size_t nodes_size = nodes.size();
  for (size_t i = 0; i < nodes_size; ++i) {
    auto node = nodes[i];
@ -75,41 +48,139 @@ int LiteSession::RunGraph() {
    }
    TensorPtrVector in_tensors;
    for (size_t j = 0; j < node->input_indices_.size(); ++j) {
-      in_tensors.push_back(g_Model->all_tensors_[node->input_indices_[j]]);
+      in_tensors.push_back(g_model->all_tensors_[node->input_indices_[j]]);
    }
    TensorPtrVector out_tensors;
    for (size_t j = 0; j < node->output_indices_.size(); ++j) {
-      out_tensors.push_back(g_Model->all_tensors_[node->output_indices_[j]]);
+      out_tensors.push_back(g_model->all_tensors_[node->output_indices_[j]]);
    }
    int type = node->primitive_->type_;
-    int ret = RET_ERROR;
-    switch (type) {
-      case KernelType::MatMul:
-        ret = DoMatMul(in_tensors, out_tensors, node, &allocator);
-        break;
-      case KernelType::Activation:
-        ret = DoActivation(in_tensors, out_tensors, node, &allocator);
-        break;
-      case KernelType::Log:
-      case KernelType::Neg:
-        ret = DoArithmeticSelf(in_tensors, out_tensors, node, &allocator);
-        break;
-      case KernelType::LogGrad:
-      case KernelType::NegGrad:
-        ret = DoArithmeticGradSelf(in_tensors, out_tensors, node, &allocator);
-        break;
-      case KernelType::ActivationGrad:
-        ret = DoActivationGrad(in_tensors, out_tensors, node, &allocator);
-        break;
-      default:
+    InferShape infershape = g_infershape_funcs[type];
+    if (infershape == NULL) {
+      MS_LOG(ERROR) << "Unsupport kernel type: " << type;
+      return RET_PARAM_INVALID;
+    }
+    int ret = (*infershape)(in_tensors, out_tensors, node->primitive_);
+    if (ret == RET_INFER_INVALID) {
+      g_infershape_interrupt = true;
+      MS_LOG(INFO) << node->name_ << "inferShape shouldn't be done before runtime, inferShape interrupt!";
+    }
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "Infer shape fail!ret: " << ret;
+      return ret;
+    }
+  }
+  return RET_OK;
+}
+
+static void InitFuncs() {
+  if (g_first_load) {
+    g_infershape_funcs[KernelType::MatMul] = DoMatMulInferShape;
+    g_infershape_funcs[KernelType::Activation] = DoActivationInferShape;
+    g_infershape_funcs[KernelType::Log] = DoArithmeticSelfInferShape;
+    g_infershape_funcs[KernelType::Neg] = DoArithmeticSelfInferShape;
+    g_infershape_funcs[KernelType::ActivationGrad] = DoActivationGradInferShape;
+
+    g_runkernel_funcs[KernelType::MatMul] = DoMatMul;
+    g_runkernel_funcs[KernelType::Activation] = DoActivation;
+    g_runkernel_funcs[KernelType::Log] = DoArithmeticSelf;
+    g_runkernel_funcs[KernelType::LogGrad] = DoArithmeticSelfGrad;
+    g_runkernel_funcs[KernelType::Neg] = DoArithmeticSelf;
+    g_runkernel_funcs[KernelType::NegGrad] = DoArithmeticSelfGrad;
+    g_runkernel_funcs[KernelType::ActivationGrad] = DoActivationGrad;
+    g_first_load = false;
+  }
+}
+
+LiteSession *LiteSession::CreateSession(Context *context) {
+  g_ctx = context;
+  return &g_session;
+}
+
+int LiteSession::CompileGraph(Model *model) {
+  InitFuncs();
+  g_model = model;
+  for (auto in : g_model->input_indices_) {
+    g_model->all_tensors_[in]->data_ = g_allocator.Malloc(g_model->all_tensors_[in]->Size());
+  }
+  g_infershape_interrupt = false;
+  int ret = ModelInferShape();
+  if (ret != RET_OK && ret != RET_INFER_INVALID) {
+    return ret;
+  }
+  return RET_OK;
+}
+
+TensorPtrVector LiteSession::GetInputs() const {
+  TensorPtrVector in(g_model->input_indices_.size());
+  for (size_t i = 0; i < g_model->input_indices_.size(); ++i) {
+    in.at(i) = g_model->all_tensors_[i];
+  }
+  return in;
+}
+
+TensorPtrVector LiteSession::GetInputsByName(const String &node_name) const { return TensorPtrVector(); }
+
+TensorPtrVector LiteSession::GetOutputsByNodeName(const String &node_name) const { return TensorPtrVector(); }
+
+TensorPtrVector LiteSession::GetOutputs() const {
+  TensorPtrVector out(g_model->output_indices_.size());
+  for (size_t i = 0; i < g_model->output_indices_.size(); ++i) {
+    out.at(i) = g_model->all_tensors_[i];
+  }
+  return out;
+}
+
+int LiteSession::RunGraph() {
+  NodePtrVector nodes = g_model->nodes_;
+  size_t nodes_size = nodes.size();
+  for (size_t i = 0; i < nodes_size; ++i) {
+    auto node = nodes[i];
+    if (node->primitive_ == nullptr) {
+      MS_LOG(ERROR) << "node's primitive is NULL!";
+      return RET_ERROR;
+    }
+    TensorPtrVector in_tensors;
+    for (size_t j = 0; j < node->input_indices_.size(); ++j) {
+      in_tensors.push_back(g_model->all_tensors_[node->input_indices_[j]]);
+    }
+    TensorPtrVector out_tensors;
+    for (size_t j = 0; j < node->output_indices_.size(); ++j) {
+      out_tensors.push_back(g_model->all_tensors_[node->output_indices_[j]]);
+    }
+    int type = node->primitive_->type_;
+    if (g_infershape_interrupt) {
+      InferShape infershape = g_infershape_funcs[type];
+      if (infershape == NULL) {
        MS_LOG(ERROR) << "Unsupport kernel type: " << type;
        return RET_PARAM_INVALID;
+      }
+      int ret = (*infershape)(in_tensors, out_tensors, node->primitive_);
+      if (ret != RET_OK) {
+        MS_LOG(ERROR) << "InferShape fail!ret: " << ret;
+        return ret;
+      }
    }
+    for (size_t j = 0; j < out_tensors.size(); ++j) {
+      out_tensors[j]->data_ = g_allocator.Malloc(out_tensors[j]->Size());
+      if (out_tensors[j]->data_ == NULL) {
+        MS_LOG(ERROR) << "Malloc data for out tensor fail!";
+        return RET_NULL_PTR;
+      }
+    }
+    RunKernel run_kernel = g_runkernel_funcs[type];
+    if (run_kernel == NULL) {
+      MS_LOG(ERROR) << "Unsupport kernel type: " << type;
+      return RET_PARAM_INVALID;
+    }
+
+    int ret = (*run_kernel)(in_tensors, out_tensors, node, &g_allocator);
    if (ret != RET_OK) {
      MS_LOG(ERROR) << "run kernel fail!ret: " << ret;
      return ret;
    }
  }
+  g_infershape_interrupt = false;
  return RET_OK;
 }

@ -117,4 +188,4 @@ StringVector LiteSession::GetOutputTensorNames() const { return StringVector();

 MSTensor *LiteSession::GetOutputByTensorName(const String &tensor_name) const { return NULL; }

-int LiteSession::Resize(const TensorPtrVector &inputs, Int32VectorVector dims) { return 0; }
+int LiteSession::Resize(const TensorPtrVector &inputs, const Int32VectorVector &dims) { return 0; }
--- a/mindspore/lite/internal/src/ms_tensor.cc
+++ b/mindspore/lite/internal/src/ms_tensor.cc
@ -17,15 +17,24 @@
 #include <vector>
 #include <numeric>
 #include <string>
-#include <functional>
 #include "internal/include/ms_tensor.h"
 MSTensor *CreateTensor(TypeId data_type, const ShapeVector &shape) {
-  MSTensor *tensor = new MSTensor();
+  MSTensor *tensor = (MSTensor *)malloc(sizeof(MSTensor));
+  if (tensor == NULL) {
+    return NULL;
+  }
  tensor->shape_ = shape;
  tensor->data_type_ = data_type;
  return tensor;
 }
-int MSTensor::ElementsNum() const { return std::accumulate(shape_.begin(), shape_.end(), 1LL, std::multiplies<int>()); }
+
+int MSTensor::ElementsNum() const {
+  int result = 1;
+  for (size_t i = 0; i < shape_.size(); ++i) {
+    result *= shape_.at(i);
+  }
+  return result;
+}

 size_t MSTensor::Size() const {
  size_t size = 0;
--- a/mindspore/lite/test/ut/internal/CMakeLists.txt
+++ b/mindspore/lite/test/ut/internal/CMakeLists.txt
@ -0,0 +1,73 @@
+set(TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../../..)
+set(TEST_DIR ${TOP_DIR}/mindspore/lite/test)
+set(LITE_DIR ${TOP_DIR}/mindspore/lite)
+
+include_directories(${TOP_DIR})
+include_directories(${TEST_DIR})
+
+string(REPLACE " -Werror " " " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+string(REPLACE " -Werror " " " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+STRING(REPLACE " -fvisibility=hidden " " -fvisibility=default " CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+STRING(REPLACE " -fvisibility=hidden " " -fvisibility=default " CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+
+### cpu kernel
+file(GLOB KERNEL_OP_SRC
+        ${LITE_DIR}/internal/src/kernel/*.cc
+        ${LITE_DIR}/internal/src/kernel/common/*.cc
+        ${LITE_DIR}/internal/src/kernel/fp32/*.cc
+        ${LITE_DIR}/internal/src/kernel/fp32_grad/*.cc
+        ${LITE_DIR}/nnacl/*.c
+        ${LITE_DIR}/nnacl/fp32/*.c
+        ${LITE_DIR}/nnacl/fp32_grad/*.c
+        ${LITE_DIR}/nnacl/int8/*.c
+        ${LITE_DIR}/nnacl/quantization/*.c
+        )
+
+if (PLATFORM_ARM64)
+    # assembly
+    file(GLOB TEST_ASSEMBLY_SRC ${LITE_DIR}/nnacl/assembly/arm64/*.s
+            ${LITE_DIR}/nnacl/assembly/arm64/*.S)
+
+    set_property(SOURCE ${TEST_ASSEMBLY_SRC} PROPERTY LANGUAGE C)
+    set(KERNEL_OP_SRC
+            ${KERNEL_OP_SRC}
+            ${TEST_ASSEMBLY_SRC}
+            )
+endif()
+
+### runtime framework
+set(TEST_LITE_SRC
+        ${LITE_DIR}/internal/src/lite_session.cc
+        ${LITE_DIR}/src/runtime/allocator.cc
+        ${LITE_DIR}/internal/src/ms_tensor.cc
+        ${TOP_DIR}/mindspore/core/utils/log_adapter.cc
+        ${TOP_DIR}/mindspore/core/gvar/logging_level.cc
+        )
+
+### test src
+file(GLOB_RECURSE TEST_CASE_KERNEL_SRC
+    ${TEST_DIR}/ut/internal/*.cc
+)
+
+file(GLOB_RECURSE TEST_CASE_KERNEL_TRAIN_SRC
+    ${TEST_DIR}/ut/src/runtime/kernel/arm/fp32_grad/*.cc
+)
+
+set(TEST_SRC
+    ${TEST_LITE_SRC}
+    ${TEST_CASE_KERNEL_SRC}
+    ${KERNEL_OP_SRC}
+    ${TEST_DIR}/common/common_test.cc
+    ${TEST_DIR}/main.cc
+)
+
+add_executable(lite-test-internal ${TEST_SRC})
+
+target_link_libraries(lite-test-internal dl ${GTEST_LIBRARY})
+if (PLATFORM_ARM64)
+    target_link_libraries(lite-test-internal mslite_internal)
+endif()
+
+if (PLATFORM_ARM32 OR PLATFORM_ARM64)
+       target_link_libraries(lite-test-internal log)
+endif()
--- a/mindspore/lite/test/ut/internal/infer_test.cc
+++ b/mindspore/lite/test/ut/internal/infer_test.cc
@ -22,7 +22,7 @@
 #include "internal/include/context.h"
 #include "internal/include/errorcode.h"
 #include "internal/include/ms_tensor.h"
-#include "nnacl/conv_parameter.h"
+#include "nnacl/op_base.h"

 namespace mindspore {
 class InferTest : public mindspore::CommonTest {
@ -31,33 +31,42 @@ class InferTest : public mindspore::CommonTest {
 };

 TEST_F(InferTest, TestSession) {
-//  Model model;
-//  Node *node = (Node *)malloc(sizeof(Node));
-//  node->name_ = "conv2d";
-//  uint32_t index = model.all_tensors_.size();
-//  node->input_indices_ = {index};
-//  MSTensor *in = CreateTensor(kNumberTypeFloat32, {3, 3, 24, 24});
-//  model.all_tensors_.emplace_back(in);
-//
-//  index = model.all_tensors_.size();
-//  node->output_indices_ = {index};
-//  MSTensor *out = CreateTensor(kNumberTypeFloat32, {3, 3, 24, 24});
-//  model.all_tensors_.emplace_back(out);
-//
-//  ConvParameter *param = (ConvParameter *)malloc(sizeof(ConvParameter));
-//  param->kernel_w_ = 3;
-//  // todo: fill other param fields
-//  node->primitive_ = (PrimitiveC *)param;
-//  model.nodes_.push_back(node);
-//
-//  LiteSession session;
-//  session.CompileGraph(&model);
-//  TensorPtrVector invec = session.GetInputs();
-//  ASSERT_EQ(invec.size(), 1);
-//  // todo: fill inputs data
-//  session.RunGraph();
-//  TensorPtrVector outvec = session.GetOutputs();
-//  ASSERT_EQ(outvec.size(), 1);
+  Model model;
+  Node *node = reinterpret_cast<Node *>(malloc(sizeof(Node)));
+
+  node->name_ = "Neg";
+  node->node_type_ = NodeType::NodeType_CNode;
+  PrimitiveC *prim = reinterpret_cast<PrimitiveC *>(malloc(sizeof(PrimitiveC)));
+  prim->type_ = KernelType::Neg;
+  node->input_indices_.push_back(0);
+  node->output_indices_.push_back(1);
+
+  MSTensor *in = CreateTensor(kNumberTypeFloat32, {1, 1, 1, 10});
+  model.all_tensors_.push_back(in);
+  model.input_indices_.push_back(0);
+
+  MSTensor *out = CreateTensor(kNumberTypeFloat32, {1, 1, 1, 10});
+  model.all_tensors_.emplace_back(out);
+  node->output_indices_.push_back(1);
+
+  LiteSession session;
+  session.CompileGraph(&model);
+  TensorPtrVector invec = session.GetInputs();
+  ASSERT_EQ(invec.size(), 1);
+  constexpr int kOutSize = 10;
+  float expect_out[kOutSize];
+  for (int i = 0; i < kOutSize; ++i) {
+    *(reinterpret_cast<float *>(in->data_) + i) = i + 1;
+    expect_out[i] = -(i + 1);
+  }
+  session.RunGraph();
+  TensorPtrVector outvec = session.GetOutputs();
+  ASSERT_EQ(outvec.size(), 1);
+  for (int i = 0; i < kOutSize; ++i) {
+    std::cout << *(reinterpret_cast<float *>(outvec.at(0)->data_)+ i) << " ";
+  }
+  std::cout << "\n";
+  CompareOutputData(reinterpret_cast<float *>(outvec.at(0)->data_), expect_out, kOutSize, 0.000001);
 }

 }  // namespace mindspore