!47657 MSLite, add device memory st

Merge pull request !47657 from 徐永飞/master
2023-01-10 11:28:33 +00:00 · 2023-01-10 11:28:33 +00:00 · 9555e64b73
parent 610148c0d3 345580c712
commit 9555e64b73
10 changed files with 700 additions and 9 deletions
--- a/mindspore/lite/test/st/cpp/device_example_cpp/CMakeLists.txt
+++ b/mindspore/lite/test/st/cpp/device_example_cpp/CMakeLists.txt
@ -0,0 +1,45 @@
+cmake_minimum_required(VERSION 3.14)
+project(QuickStartCpp)
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0)
+    message(FATAL_ERROR "GCC version ${CMAKE_CXX_COMPILER_VERSION} must not be less than 7.3.0")
+endif()
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+if(DEFINED ENV{LITE_HOME})
+    set(LITE_HOME $ENV{LITE_HOME})
+endif()
+
+if(DEFINED ENV{EXAMPLE_TARGET})
+    set(EXAMPLE_TARGET $ENV{EXAMPLE_TARGET})
+endif()
+
+# Add directory to include search path
+include_directories(${LITE_HOME}/runtime)
+# Add directory to linker search path
+link_directories(${LITE_HOME}/runtime/lib)
+
+if("${EXAMPLE_TARGET}" STREQUAL "Ascend")
+    include_directories(/usr/local/Ascend/latest/fwkacllib/include)
+    link_directories(/usr/local/Ascend/latest/fwkacllib/lib64)
+    add_definitions(-DENABLE_ASCEND)
+else()
+    set(CUDA_HOME $ENV{CUDA_HOME})
+    include_directories(${CUDA_HOME}/include)
+    link_directories(${CUDA_HOME}/lib64)
+    add_definitions(-DENABLE_GPU)
+endif()
+
+file(GLOB_RECURSE QUICK_START_CXX ${CMAKE_CURRENT_SOURCE_DIR}/*.cc)
+add_executable(mindspore_quick_start_cpp ${QUICK_START_CXX})
+
+target_link_libraries(
+    mindspore_quick_start_cpp mindspore-lite pthread dl
+)
+
+if("${EXAMPLE_TARGET}" STREQUAL "Ascend")
+    target_link_libraries(mindspore_quick_start_cpp ascendcl)
+else()
+    target_link_libraries(mindspore_quick_start_cpp cudart cublas)
+endif()
--- a/mindspore/lite/test/st/cpp/device_example_cpp/build.sh
+++ b/mindspore/lite/test/st/cpp/device_example_cpp/build.sh
@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# -ne 1 ]; then
+    echo "Usage: bash build.sh [DEVICE_TARGET]
+    DEVICE_TARGET can choose from ['Ascend', 'GPU']."
+exit
+fi
+
+device_target=$1
+
+if [ 0"$LITE_HOME" = "0" ]; then
+  echo "Please set env LITE_HOME to MindSpore Lite tar path"
+  exit
+fi
+
+if [ 0"$device_target" != "0GPU" ] && [ 0"$device_target" != "0Ascend" ]; then
+  echo "Please set args 1 EXAMPLE_TARGET to Ascend or GPU"
+  exit
+fi
+
+if [ 0"$device_target" = "0GPU" ] && [ 0"$CUDA_HOME" = "0" ]; then
+  echo "Please set env CUDA_HOME to path of cuda, if env EXAMPLE_TARGET is GPU"
+  exit
+fi
+
+rm -rf build
+mkdir build && cd build || exit
+cmake ../ -DEXAMPLE_TARGET=$device_target
+make
--- a/mindspore/lite/test/st/cpp/device_example_cpp/example.cc
+++ b/mindspore/lite/test/st/cpp/device_example_cpp/example.cc
@ -0,0 +1,445 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <random>
+#include <iostream>
+#include <fstream>
+#include <cstring>
+#include <memory>
+#include "include/api/model.h"
+#include "include/api/context.h"
+#include "include/api/status.h"
+#include "include/api/types.h"
+
+#ifdef ENABLE_ASCEND
+#include "./mem_ascend.h"
+#else
+#include "./mem_gpu.h"
+#endif
+bool g_set_data = true;
+std::vector<std::vector<uint8_t>> g_cmp_data;
+
+static std::string ShapeToString(const std::vector<int64_t> &shape) {
+  std::string result = "[";
+  for (size_t i = 0; i < shape.size(); ++i) {
+    result += std::to_string(shape[i]);
+    if (i + 1 < shape.size()) {
+      result += ", ";
+    }
+  }
+  result += "]";
+  return result;
+}
+
+template <typename T, typename Distribution>
+void GenerateRandomData(int size, void *data, Distribution distribution) {
+  std::random_device rd{};
+  std::mt19937 random_engine{rd()};
+  int elements_num = size / sizeof(T);
+  (void)std::generate_n(static_cast<T *>(data), elements_num,
+                        [&distribution, &random_engine]() { return static_cast<T>(distribution(random_engine)); });
+}
+
+int GenerateRandomInputData(std::vector<mindspore::MSTensor> inputs, std::vector<uint8_t *> *host_data_buffer) {
+  for (auto tensor : inputs) {
+    auto data_size = tensor.DataSize();
+    if (data_size == 0) {
+      std::cerr << "Data size cannot be 0, tensor shape: " << ShapeToString(tensor.Shape()) << std::endl;
+      return -1;
+    }
+    auto host_data = new uint8_t[data_size];
+    host_data_buffer->push_back(host_data);
+    GenerateRandomData<float>(data_size, host_data, std::normal_distribution<float>(0.0f, 1.0f));
+  }
+  return 0;
+}
+
+int SetHostData(std::vector<mindspore::MSTensor> tensors, const std::vector<uint8_t *> &host_data_buffer) {
+  for (size_t i = 0; i < tensors.size(); i++) {
+    tensors[i].SetData(host_data_buffer[i], false);
+    tensors[i].SetDeviceData(nullptr);
+  }
+  return 0;
+}
+
+int SetDeviceData(std::vector<mindspore::MSTensor> tensors, const std::vector<uint8_t *> &host_data_buffer,
+                  std::vector<void *> *device_buffers) {
+  for (size_t i = 0; i < tensors.size(); i++) {
+    auto &tensor = tensors[i];
+    auto host_data = host_data_buffer[i];
+    auto data_size = tensor.DataSize();
+    if (data_size == 0) {
+      std::cerr << "Data size cannot be 0, tensor shape: " << ShapeToString(tensor.Shape()) << std::endl;
+      return -1;
+    }
+    auto device_data = MallocDeviceMemory(data_size);
+    if (device_data == nullptr) {
+      std::cerr << "Failed to alloc device data, data size " << data_size << std::endl;
+      return -1;
+    }
+    device_buffers->push_back(device_data);
+    if (CopyMemoryHost2Device(device_data, data_size, host_data, data_size) != 0) {
+      std::cerr << "Failed to copy data to device, data size " << data_size << std::endl;
+      return -1;
+    }
+    tensor.SetDeviceData(device_data);
+    tensor.SetData(nullptr, false);
+  }
+  return 0;
+}
+
+int SetOutputHostData(std::vector<mindspore::MSTensor> tensors, std::vector<uint8_t *> *host_buffers) {
+  for (size_t i = 0; i < tensors.size(); i++) {
+    auto &tensor = tensors[i];
+    auto data_size = tensor.DataSize();
+    if (data_size == 0) {
+      std::cerr << "Data size cannot be 0, tensor shape: " << ShapeToString(tensor.Shape()) << std::endl;
+      return -1;
+    }
+    auto host_data = new uint8_t[data_size];
+    host_buffers->push_back(host_data);
+    tensor.SetData(host_data, false);
+    tensor.SetDeviceData(nullptr);
+  }
+  return 0;
+}
+
+int SetOutputDeviceData(std::vector<mindspore::MSTensor> tensors, std::vector<void *> *device_buffers) {
+  for (size_t i = 0; i < tensors.size(); i++) {
+    auto &tensor = tensors[i];
+    auto data_size = tensor.DataSize();
+    if (data_size == 0) {
+      std::cerr << "Data size cannot be 0, tensor shape: " << ShapeToString(tensor.Shape()) << std::endl;
+      return -1;
+    }
+    auto device_data = MallocDeviceMemory(data_size);
+    if (device_data == nullptr) {
+      std::cerr << "Failed to alloc device data, data size " << data_size << std::endl;
+      return -1;
+    }
+    device_buffers->push_back(device_data);
+    tensor.SetDeviceData(device_data);
+    tensor.SetData(nullptr, false);
+  }
+  return 0;
+}
+
+template <class T>
+void PrintBuffer(const void *buffer, size_t elem_count) {
+  auto data = reinterpret_cast<const T *>(buffer);
+  constexpr size_t max_print_count = 50;
+  for (size_t i = 0; i < elem_count && i <= max_print_count; i++) {
+    std::cout << data[i] << " ";
+  }
+  std::cout << std::endl;
+}
+
+bool PrintOutputsTensor(std::vector<mindspore::MSTensor> outputs) {
+  if (g_set_data) {
+    g_cmp_data.clear();
+  } else {
+    if (g_cmp_data.size() != outputs.size()) {
+      std::cout << "Output size " << outputs.size() << " != output size last time " << g_cmp_data.size() << std::endl;
+      return false;
+    }
+  }
+  for (size_t i = 0; i < outputs.size(); i++) {
+    auto &tensor = outputs[i];
+    auto elem_num = tensor.ElementNum();
+    auto data_size = tensor.DataSize();
+    std::vector<uint8_t> host_data;
+    const void *print_data;
+    if (tensor.GetDeviceData() != nullptr) {
+      host_data.resize(data_size);
+      CopyMemoryDevice2Host(host_data.data(), host_data.size(), tensor.GetDeviceData(), data_size);
+      print_data = host_data.data();
+      std::cout << "Device data, tensor name is:" << tensor.Name() << " tensor size is:" << data_size
+                << " tensor elements num is:" << elem_num << std::endl;
+    } else {
+      print_data = tensor.Data().get();
+      std::cout << "Host data, tensor name is:" << tensor.Name() << " tensor size is:" << data_size
+                << " tensor elements num is:" << elem_num << std::endl;
+    }
+    if (print_data == nullptr) {
+      std::cerr << "Invalid output data" << std::endl;
+      return false;
+    }
+    auto data_type = tensor.DataType();
+    if (data_type == mindspore::DataType::kNumberTypeFloat32) {
+      PrintBuffer<float>(print_data, elem_num);
+    } else if (data_type == mindspore::DataType::kNumberTypeFloat64) {
+      PrintBuffer<double>(print_data, elem_num);
+    } else if (data_type == mindspore::DataType::kNumberTypeInt64) {
+      PrintBuffer<int64_t>(print_data, elem_num);
+    } else if (data_type == mindspore::DataType::kNumberTypeInt32) {
+      PrintBuffer<int32_t>(print_data, elem_num);
+    } else if (data_type == mindspore::DataType::kNumberTypeInt16) {
+      PrintBuffer<int16_t>(print_data, elem_num);
+    } else if (data_type == mindspore::DataType::kNumberTypeInt8) {
+      PrintBuffer<int8_t>(print_data, elem_num);
+    } else if (data_type == mindspore::DataType::kNumberTypeUInt64) {
+      PrintBuffer<uint64_t>(print_data, elem_num);
+    } else if (data_type == mindspore::DataType::kNumberTypeUInt32) {
+      PrintBuffer<uint32_t>(print_data, elem_num);
+    } else if (data_type == mindspore::DataType::kNumberTypeUInt16) {
+      PrintBuffer<uint16_t>(print_data, elem_num);
+    } else if (data_type == mindspore::DataType::kNumberTypeUInt8) {
+      PrintBuffer<uint8_t>(print_data, elem_num);
+    } else if (data_type == mindspore::DataType::kNumberTypeBool) {
+      PrintBuffer<bool>(print_data, elem_num);
+    } else {
+      std::cout << "Unsupported data type " << static_cast<int>(tensor.DataType()) << std::endl;
+    }
+    if (g_set_data) {
+      if (host_data.empty()) {
+        host_data.resize(data_size);
+        memcpy(host_data.data(), print_data, host_data.size());
+      }
+      g_cmp_data.emplace_back(std::move(host_data));
+    } else {
+      auto &cmp_data = g_cmp_data[i];
+      if (cmp_data.size() != data_size) {
+        std::cout << "Output " << i << " data size " << data_size << " != data size last time " << cmp_data.size()
+                  << std::endl;
+        return false;
+      }
+      auto host_uint8 = reinterpret_cast<const uint8_t *>(print_data);
+      for (size_t k = 0; k < cmp_data.size(); k++) {
+        if (cmp_data[k] != host_uint8[k]) {
+          std::cout << "Output " << i << " data as uint8_t " << (uint32_t)host_uint8[k] << " != that last time "
+                    << (uint32_t)host_uint8[k] << std::endl;
+          return false;
+        }
+      }
+    }
+  }
+  return true;
+}
+
+int Predict(mindspore::Model *model, const std::vector<mindspore::MSTensor> &inputs,
+            std::vector<mindspore::MSTensor> *outputs) {
+  auto ret = model->Predict(inputs, outputs);
+  if (ret != mindspore::kSuccess) {
+    std::cerr << "Predict error " << ret << std::endl;
+    return -1;
+  }
+  if (!PrintOutputsTensor(*outputs)) {
+    return -1;
+  }
+  return 0;
+}
+
+class ResourceGuard {
+ public:
+  explicit ResourceGuard(std::function<void()> rel_func) : rel_func_(rel_func) {}
+  ~ResourceGuard() {
+    if (rel_func_) {
+      rel_func_();
+    }
+  }
+
+ private:
+  std::function<void()> rel_func_ = nullptr;
+};
+
+int TestHostDeviceInput(mindspore::Model *model, uint32_t batch_size) {
+  // Get Input
+  auto inputs = model->GetInputs();
+  std::vector<std::vector<int64_t>> input_shapes;
+  std::transform(inputs.begin(), inputs.end(), std::back_inserter(input_shapes), [batch_size](auto &item) {
+    auto shape = item.Shape();
+    shape[0] = batch_size;
+    return shape;
+  });
+  if (model->Resize(inputs, input_shapes) != mindspore::kSuccess) {
+    std::cerr << "Failed to resize model batch size to " << batch_size << std::endl;
+    return -1;
+  }
+  std::cout << "Success resize model batch size to " << batch_size << std::endl;
+
+  // Generate random data as input data.
+  std::vector<uint8_t *> host_buffers;
+  ResourceGuard host_rel([&host_buffers]() {
+    for (auto &item : host_buffers) {
+      delete[] item;
+    }
+  });
+
+  std::vector<void *> device_buffers;
+  ResourceGuard device_rel([&device_buffers]() {
+    for (auto &item : device_buffers) {
+      FreeDeviceMemory(item);
+    }
+  });
+
+  auto ret = GenerateRandomInputData(inputs, &host_buffers);
+  if (ret != 0) {
+    std::cerr << "Generate Random Input Data failed." << std::endl;
+    return -1;
+  }
+  // empty outputs
+  std::vector<mindspore::MSTensor> outputs;
+  // Model Predict, input host memory
+  SetHostData(inputs, host_buffers);
+  g_set_data = true;
+  if (Predict(model, inputs, &outputs) != 0) {
+    return -1;
+  }
+  g_set_data = false;  // compare data next time
+  // Model Predict, input device memory
+  outputs.clear();
+  SetDeviceData(inputs, host_buffers, &device_buffers);
+  if (Predict(model, inputs, &outputs) != 0) {
+    return -1;
+  }
+  return 0;
+}
+
+int TestHostDeviceOutput(mindspore::Model *model, uint32_t batch_size) {
+  // Get Input
+  auto inputs = model->GetInputs();
+  std::vector<std::vector<int64_t>> input_shapes;
+  std::transform(inputs.begin(), inputs.end(), std::back_inserter(input_shapes), [batch_size](auto &item) {
+    auto shape = item.Shape();
+    shape[0] = batch_size;
+    return shape;
+  });
+  if (model->Resize(inputs, input_shapes) != mindspore::kSuccess) {
+    std::cerr << "Failed to resize model batch size to " << batch_size << std::endl;
+    return -1;
+  }
+  std::cout << "Success resize model batch size to " << batch_size << std::endl;
+
+  // Generate random data as input data.
+  std::vector<uint8_t *> host_buffers;
+  ResourceGuard host_rel([&host_buffers]() {
+    for (auto &item : host_buffers) {
+      delete[] item;
+    }
+  });
+
+  std::vector<void *> device_buffers;
+  ResourceGuard device_rel([&device_buffers]() {
+    for (auto &item : device_buffers) {
+      FreeDeviceMemory(item);
+    }
+  });
+
+  auto ret = GenerateRandomInputData(inputs, &host_buffers);
+  if (ret != 0) {
+    std::cerr << "Generate Random Input Data failed." << std::endl;
+    return -1;
+  }
+  // Get Output from model
+  auto outputs = model->GetOutputs();
+  // ---------------------- output host data
+  std::vector<uint8_t *> output_host_buffers;
+  ResourceGuard output_host_rel([&output_host_buffers]() {
+    for (auto &item : output_host_buffers) {
+      delete[] item;
+    }
+  });
+  if (SetOutputHostData(outputs, &output_host_buffers) != 0) {
+    std::cerr << "Failed to set output host data" << std::endl;
+    return -1;
+  }
+  // Model Predict, input host memory
+  SetHostData(inputs, host_buffers);
+  g_set_data = true;
+  if (Predict(model, inputs, &outputs) != 0) {
+    return -1;
+  }
+  g_set_data = false;  // compare data next time
+  // Model Predict, input device memory
+  if (SetDeviceData(inputs, host_buffers, &device_buffers) != 0) {
+    std::cerr << "Failed to set input device data" << std::endl;
+    return -1;
+  }
+  if (Predict(model, inputs, &outputs) != 0) {
+    return -1;
+  }
+  // ---------------------- output device data
+  std::vector<void *> output_device_buffers;
+  ResourceGuard output_device_rel([&output_device_buffers]() {
+    for (auto &item : output_device_buffers) {
+      FreeDeviceMemory(item);
+    }
+  });
+  if (SetOutputDeviceData(outputs, &output_device_buffers) != 0) {
+    std::cerr << "Failed to set output device data" << std::endl;
+    return -1;
+  }
+  // Model Predict, input host memory
+  SetHostData(inputs, host_buffers);
+  if (Predict(model, inputs, &outputs) != 0) {
+    return -1;
+  }
+  // Model Predict, input device memory
+  if (SetDeviceData(inputs, host_buffers, &device_buffers) != 0) {
+    std::cerr << "Failed to set input device data" << std::endl;
+    return -1;
+  }
+  if (Predict(model, inputs, &outputs) != 0) {
+    return -1;
+  }
+  return 0;
+}
+
+int QuickStart(int argc, const char **argv) {
+  if (argc < 2) {
+    std::cerr << "Model file must be provided.\n";
+    return -1;
+  }
+  // Read model file.
+  std::string model_path = argv[1];
+  if (model_path.empty()) {
+    std::cerr << "Model path " << model_path << " is invalid.";
+    return -1;
+  }
+  // Create and init context, add CPU device info
+  auto context = std::make_shared<mindspore::Context>();
+  if (context == nullptr) {
+    std::cerr << "New context failed." << std::endl;
+    return -1;
+  }
+  auto &device_list = context->MutableDeviceInfo();
+
+#ifdef ENABLE_ASCEND
+  auto device_info = std::make_shared<mindspore::AscendDeviceInfo>();
+#else
+  auto device_info = std::make_shared<mindspore::GPUDeviceInfo>();
+#endif
+  device_info->SetDeviceID(0);
+  if (device_info == nullptr) {
+    std::cerr << "New CPUDeviceInfo failed." << std::endl;
+    return -1;
+  }
+  device_list.push_back(device_info);
+
+  mindspore::Model model;
+  // Build model
+  auto build_ret = model.Build(model_path, mindspore::kMindIR, context);
+  if (build_ret != mindspore::kSuccess) {
+    std::cerr << "Build model error " << build_ret << std::endl;
+    return -1;
+  }
+  TestHostDeviceInput(&model, 1);
+  TestHostDeviceOutput(&model, 1);
+  return 0;
+}
+
+int main(int argc, const char **argv) { return QuickStart(argc, argv); }
--- a/mindspore/lite/test/st/cpp/device_example_cpp/mem_ascend.h
+++ b/mindspore/lite/test/st/cpp/device_example_cpp/mem_ascend.h
@ -0,0 +1,57 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_EXAMPLE_ASCEND_MEM_H
+#define MINDSPORE_LITE_EXAMPLE_ASCEND_MEM_H
+#include <string>
+#include "acl/acl.h"
+
+void *MallocDeviceMemory(size_t data_size) {
+  void *device_data = nullptr;
+  auto ret = aclrtMalloc(&device_data, data_size, ACL_MEM_MALLOC_NORMAL_ONLY);
+  if (ret != ACL_ERROR_NONE) {
+    std::cerr << "Malloc device buffer failed , buffer size " << data_size;
+    return nullptr;
+  }
+  return device_data;
+}
+
+void FreeDeviceMemory(void *device_data) {
+  if (device_data) {
+    aclrtFree(device_data);
+  }
+}
+
+int CopyMemoryHost2Device(void *device_data, size_t dst_size, void *host_data, size_t src_size) {
+  auto ret = aclrtMemcpy(device_data, dst_size, host_data, src_size, ACL_MEMCPY_HOST_TO_DEVICE);
+  if (ret != ACL_ERROR_NONE) {
+    std::cerr << "Acl memcpy host data to device failed, src size: " << src_size << ", dst size: " << dst_size
+              << std::endl;
+    return -1;
+  }
+  return 0;
+}
+
+int CopyMemoryDevice2Host(void *host_data, size_t dst_size, void *device_data, size_t src_size) {
+  auto ret = aclrtMemcpy(host_data, dst_size, device_data, src_size, ACL_MEMCPY_DEVICE_TO_HOST);
+  if (ret != ACL_ERROR_NONE) {
+    std::cerr << "Acl memcpy device data to host failed, src size: " << src_size << ", dst size: " << dst_size
+              << std::endl;
+    return -1;
+  }
+  return 0;
+}
+#endif  // MINDSPORE_LITE_EXAMPLE_ASCEND_MEM_H
--- a/mindspore/lite/test/st/cpp/device_example_cpp/mem_gpu.h
+++ b/mindspore/lite/test/st/cpp/device_example_cpp/mem_gpu.h
@ -0,0 +1,57 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_EXAMPLE_GPU_MEM_H
+#define MINDSPORE_LITE_EXAMPLE_GPU_MEM_H
+#include <cuda_runtime.h>
+#include <string>
+
+void *MallocDeviceMemory(size_t data_size) {
+  void *device_data = nullptr;
+  auto ret = cudaMalloc(&device_data, data_size);
+  if (ret != cudaSuccess) {
+    std::cerr << "Malloc device buffer failed , buffer size " << data_size;
+    return nullptr;
+  }
+  return device_data;
+}
+
+void FreeDeviceMemory(void *device_data) {
+  if (device_data) {
+    cudaFree(device_data);
+  }
+}
+
+int CopyMemoryHost2Device(void *device_data, size_t dst_size, void *host_data, size_t src_size) {
+  auto ret = cudaMemcpy(device_data, host_data, src_size, cudaMemcpyHostToDevice);
+  if (ret != cudaSuccess) {
+    std::cerr << "Cuda memcpy host data to device failed, src size: " << src_size << ", dst size: " << dst_size
+              << std::endl;
+    return -1;
+  }
+  return 0;
+}
+
+int CopyMemoryDevice2Host(void *host_data, size_t dst_size, void *device_data, size_t src_size) {
+  auto ret = cudaMemcpy(host_data, device_data, src_size, cudaMemcpyDeviceToHost);
+  if (ret != cudaSuccess) {
+    std::cerr << "Cuda memcpy device data to host failed, src size: " << src_size << ", dst size: " << dst_size
+              << std::endl;
+    return -1;
+  }
+  return 0;
+}
+#endif  // MINDSPORE_LITE_EXAMPLE_GPU_MEM_H
--- a/mindspore/lite/test/st/scripts/ascend/run_ascend.sh
+++ b/mindspore/lite/test/st/scripts/ascend/run_ascend.sh
@ -7,10 +7,10 @@ function PrePareLocal() {
  mkdir -p ${benchmark_test_path}

  cp ./scripts/base_functions.sh ${benchmark_test_path} || exit 1
-  cp -r ./python ${benchmark_test_path} || exit 1
-  cp ./scripts/ascend/run_converter_ascend.sh ${benchmark_test_path} || exit 1
  cp ./scripts/run_benchmark_python.sh ${benchmark_test_path} || exit 1
-  cp ./scripts/ascend/run_benchmark_ascend.sh ${benchmark_test_path} || exit 1
+  cp -r ./python ${benchmark_test_path} || exit 1
+  cp -r ./cpp ${benchmark_test_path} || exit 1
+  cp ./scripts/ascend/*.sh ${benchmark_test_path} || exit 1
  if [[ ${backend} =~ "_cloud" ]]; then
      cp ./../${config_folder}/models_ascend_cloud.cfg ${benchmark_test_path} || exit 1
      cp ./../${config_folder}/models_python_ascend.cfg ${benchmark_test_path} || exit 1
@ -38,11 +38,11 @@ function PrePareRemote() {
  echo "Start to copy remote file"
  ssh ${user_name}@${device_ip} "rm -rf ${benchmark_test_path}; mkdir -p ${benchmark_test_path}" || exit 1

-  scp ./scripts/ascend/run_converter_ascend.sh ${user_name}@${device_ip}:${benchmark_test_path} || exit 1
-  scp -r ./python ${user_name}@${device_ip}:${benchmark_test_path} || exit 1
-  scp ./scripts/ascend/run_benchmark_ascend.sh ${user_name}@${device_ip}:${benchmark_test_path} || exit 1
  scp ./scripts/run_benchmark_python.sh ${user_name}@${device_ip}:${benchmark_test_path} || exit 1
  scp ./scripts/base_functions.sh ${user_name}@${device_ip}:${benchmark_test_path} || exit 1
+  scp -r ./python ${user_name}@${device_ip}:${benchmark_test_path} || exit 1
+  scp -r ./cpp ${user_name}@${device_ip}:${benchmark_test_path} || exit 1
+  scp ./scripts/ascend/*.sh ${user_name}@${device_ip}:${benchmark_test_path} || exit 1
  if [[ ${backend} =~ "_cloud" ]]; then
      scp ./../${config_folder}/models_ascend_cloud.cfg ${user_name}@${device_ip}:${benchmark_test_path} || exit 1
      scp ./../${config_folder}/models_python_ascend.cfg ${user_name}@${device_ip}:${benchmark_test_path} || exit 1
--- a/mindspore/lite/test/st/scripts/ascend/run_benchmark_ascend.sh
+++ b/mindspore/lite/test/st/scripts/ascend/run_benchmark_ascend.sh
@ -174,4 +174,20 @@ if [[ ${backend} =~ "cloud" ]]; then
  fi
 fi

+if [[ ${backend} =~ "cloud" ]]; then
+  export LITE_ST_MODEL=${model_data_path}/models/hiai/mindspore_uniir_mobilenetv2.mindir
+  export LITE_ST_CPP_DIR=${benchmark_test}/cpp
+  bash ${benchmark_test}/run_device_mem_test.sh > run_device_mem_test.log
+  Run_device_example_status=$?
+  if [[ ${Run_device_example_status} != 0 ]];then
+    echo "Run device example failed"
+    cat run_device_mem_test.log
+    exit 1
+  else
+    echo "Run device example success"
+  fi
+else
+  echo "Skip run device example, while backend is ${backend}"
+fi
+
 exit ${Run_benchmark_status}
--- a/mindspore/lite/test/st/scripts/ascend/run_converter_ascend.sh
+++ b/mindspore/lite/test/st/scripts/ascend/run_converter_ascend.sh
@ -7,6 +7,8 @@ function Run_Converter() {
    cd ${x86_path}/mindspore-lite-${version}-linux-${arch}/ || exit 1
    cp tools/converter/converter/converter_lite ./ || exit 1
    export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:./tools/converter/lib/
+    export LITE_HOME=$(pwd)
+    echo "LITE_HOME:${LITE_HOME}"

    # Prepare the config file list
    local ascend_cfg_file_list=("$models_ascend_config")
--- a/mindspore/lite/test/st/scripts/ascend/run_device_mem_test.sh
+++ b/mindspore/lite/test/st/scripts/ascend/run_device_mem_test.sh
@ -0,0 +1,26 @@
+#!/bin/bash
+
+echo "Begin run run_device_mem_test"
+echo "cpp dir: ${LITE_ST_CPP_DIR}"
+echo "model path: ${LITE_ST_MODEL}"
+echo "lite home: ${LITE_HOME}"
+
+source /usr/local/Ascend/latest/bin/setenv.bash
+export LD_LIBRARY_PATH=$LITE_HOME/runtime/lib:$LITE_HOME/tools/converter/lib:$LD_LIBRARY_PATH
+
+cd ${LITE_ST_CPP_DIR}/device_example_cpp || exit 1
+
+bash build.sh Ascend
+if [ ! -f "./build/mindspore_quick_start_cpp" ];then
+  echo "Failed to build device_example_cpp"
+  exit 1
+fi
+
+build/mindspore_quick_start_cpp ${LITE_ST_MODEL}
+Run_device_example_status=$?
+if [[ ${Run_device_example_status} != 0 ]];then
+  echo "Run device example failed"
+else
+  echo "Run device example success"
+fi
+exit ${Run_device_example_status}
--- a/mindspore/lite/test/st/scripts/run_benchmark_python.sh
+++ b/mindspore/lite/test/st/scripts/run_benchmark_python.sh
@ -1,13 +1,13 @@
 #!/bin/bash

 function Run_python_ST() {
-  # $1:basePath; $2:whlPath; $3:modelPath; $4:cfgFileList; $5:backend;
+  # $1:basePath; $2:whlPath; $3:modelPath; $4:cfgFileList; $5:target;
  base_path=$1
  whl_path=$2
  model_path=$3
  in_data_path=$4
  cfg_file_list=$5
-  backend=$6
+  target=$6
  mindspore_lite_whl=`ls ${whl_path}/*.whl`
  if [[ -f "${mindspore_lite_whl}" ]]; then
    pip install ${mindspore_lite_whl} --force-reinstall --user || exit 1
@ -41,7 +41,7 @@ function Run_python_ST() {
        done
      fi
      model_file=${model_path}'/'${model_name}'.mindir'
-      python test_inference_cloud.py ${model_file} ${input_files} ${input_shapes} ${backend}
+      python test_inference_cloud.py ${model_file} ${input_files} ${input_shapes} ${target}
      Run_python_st_status=$?
      if [[ ${Run_python_st_status} != 0 ]];then
        echo "run python model name:     ${model_name}     failed.";