From 314208633b57babadcabc186f1a8e800f5b2b2b2 Mon Sep 17 00:00:00 2001 From: xuyongfei Date: Tue, 21 Jul 2020 11:19:00 +0800 Subject: [PATCH] serving add acl support, extract common inference interface --- CMakeLists.txt | 6 + build.sh | 17 +- include/infer_log.h | 107 ++++++ include/infer_tensor.h | 191 +++++++++ include/inference.h | 32 +- include/ms_tensor.h | 69 ---- mindspore/ccsrc/CMakeLists.txt | 2 +- .../session/ascend_inference_session.cc | 8 +- .../session/ascend_inference_session.h | 6 +- .../ccsrc/backend/session/infer_session.cc | 362 ++++++++++++++++++ .../ccsrc/backend/session/infer_session.h | 66 ++++ mindspore/ccsrc/backend/session/session.cc | 214 ----------- .../ccsrc/backend/session/session_basic.cc | 2 +- .../ccsrc/backend/session/session_basic.h | 6 +- mindspore/ccsrc/utils/base_ref_utils.cc | 16 +- mindspore/ccsrc/utils/base_ref_utils.h | 5 +- mindspore/core/ir/lite/tensor.cc | 64 ---- mindspore/core/ir/lite/tensor.h | 36 -- mindspore/core/ir/tensor.cc | 63 --- mindspore/core/ir/tensor.h | 35 -- serving/CMakeLists.txt | 45 ++- serving/acl/acl_session.cc | 136 +++++++ .../session.h => serving/acl/acl_session.h | 35 +- serving/acl/model_process.cc | 340 ++++++++++++++++ serving/acl/model_process.h | 74 ++++ serving/core/server.cc | 190 +++------ serving/core/server.h | 15 +- serving/core/serving_tensor.cc | 164 ++++++++ serving/core/serving_tensor.h | 79 ++++ serving/core/util/file_system_operation.cc | 37 +- serving/core/util/option_parser.cc | 3 +- serving/core/version_control/model.cc | 7 +- .../version_control/version_controller.cc | 7 +- serving/cpp_example/CMakeLists.txt | 50 +-- serving/cpp_example/ms_client.cc | 105 ++++- 35 files changed, 1829 insertions(+), 765 deletions(-) create mode 100644 include/infer_log.h create mode 100644 include/infer_tensor.h delete mode 100644 include/ms_tensor.h create mode 100644 mindspore/ccsrc/backend/session/infer_session.cc create mode 100644 mindspore/ccsrc/backend/session/infer_session.h delete mode 100644 mindspore/ccsrc/backend/session/session.cc create mode 100644 serving/acl/acl_session.cc rename mindspore/ccsrc/backend/session/session.h => serving/acl/acl_session.h (52%) create mode 100644 serving/acl/model_process.cc create mode 100644 serving/acl/model_process.h create mode 100644 serving/core/serving_tensor.cc create mode 100644 serving/core/serving_tensor.h diff --git a/CMakeLists.txt b/CMakeLists.txt index c4da105cac..ef07971b1d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,8 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/flatbuffers/include) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/flatbuffers/include/flatbuffers) +if (NOT ENABLE_ACL) + include(${CMAKE_SOURCE_DIR}/cmake/dependency_utils.cmake) find_package(Python3 3.7 COMPONENTS Interpreter Development) if(Python3_FOUND) @@ -100,8 +102,12 @@ if (ENABLE_TESTCASES) add_subdirectory(tests) endif() +endif() # NOT ENABLE_ACL + if (ENABLE_SERVING) add_subdirectory(serving) endif() +if (NOT ENABLE_ACL) include(cmake/package.cmake) +endif() # NOT ENABLE_ACL diff --git a/build.sh b/build.sh index 63613a9e6b..adeb099fb7 100755 --- a/build.sh +++ b/build.sh @@ -25,7 +25,7 @@ usage() echo "Usage:" echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" - echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]" + echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-w on|off] [-E] [-l on|off]" echo "" echo "Options:" echo " -d Debug mode" @@ -54,6 +54,7 @@ usage() echo " -I Compile predict, default off" echo " -K Compile with AKG, default on" echo " -s Enable serving module, default off" + echo " -w Enable acl module, default off" echo " -B Enable debugger, default off" echo " -E Enable IBVERBS for parameter server, default off" echo " -l Compile with python dependency, default on" @@ -97,12 +98,13 @@ checkopts() PREDICT_PLATFORM="" ENABLE_AKG="on" ENABLE_SERVING="off" + ENABLE_ACL="off" ENABLE_DEBUGGER="off" ENABLE_IBVERBS="off" ENABLE_PYTHON="on" # Process the options - while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt + while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:swB:E' opt do OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') case "${opt}" in @@ -256,6 +258,10 @@ checkopts() ENABLE_SERVING="on" echo "enable serving" ;; + w) + ENABLE_ACL="on" + echo "enable acl" + ;; B) check_on_off $OPTARG B ENABLE_DEBUGGER="on" @@ -348,6 +354,9 @@ build_mindspore() if [[ "X$ENABLE_SERVING" = "Xon" ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_SERVING=ON" fi + if [[ "X$ENABLE_ACL" = "Xon" ]]; then + CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_ACL=ON" + fi if [[ "X$ENABLE_DEBUGGER" = "Xon" ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DEBUGGER=ON" fi @@ -362,7 +371,11 @@ build_mindspore() if [[ -n "$VERBOSE" ]]; then CMAKE_VERBOSE="--verbose" fi + if [[ "X$ENABLE_ACL" = "Xon" ]]; then + cmake --build . ${CMAKE_VERBOSE} -j$THREAD_NUM + else cmake --build . --target package ${CMAKE_VERBOSE} -j$THREAD_NUM + fi echo "success to build mindspore project!" } diff --git a/include/infer_log.h b/include/infer_log.h new file mode 100644 index 0000000000..e3607e5500 --- /dev/null +++ b/include/infer_log.h @@ -0,0 +1,107 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_INFERENCE_LOG_H_ +#define MINDSPORE_INFERENCE_LOG_H_ + +#include +#include +#include +#include +#include +#include + +#ifndef ENABLE_ACL +#include "mindspore/ccsrc/utils/log_adapter.h" +namespace mindspore::inference { +#define MSI_LOG(level) MS_LOG(level) + +#define MSI_LOG_DEBUG MSI_LOG(DEBUG) +#define MSI_LOG_INFO MSI_LOG(INFO) +#define MSI_LOG_WARNING MSI_LOG(WARNING) +#define MSI_LOG_ERROR MSI_LOG(ERROR) + +#define MSI_ASSERT(item) MS_ASSERT(item) +} // namespace mindspore::inference + +#else // ENABLE_ACL +#include "acl/acl.h" +namespace mindspore::inference { + +class LogStream { + public: + LogStream() { sstream_ = std::make_shared(); } + ~LogStream() = default; + + template + LogStream &operator<<(const T &val) noexcept { + (*sstream_) << val; + return *this; + } + + LogStream &operator<<(std::ostream &func(std::ostream &os)) noexcept { + (*sstream_) << func; + return *this; + } + + friend class LogWriter; + + private: + std::shared_ptr sstream_; +}; + +template ::value, int>::type = 0> +constexpr std::ostream &operator<<(std::ostream &stream, const T &value) { + return stream << static_cast::type>(value); +} + +class LogWriter { + public: + LogWriter(const char *file, int line, const char *func, aclLogLevel log_level) + : file_(file), line_(line), func_(func), log_level_(log_level) {} + ~LogWriter() = default; + + void operator<(const LogStream &stream) const noexcept __attribute__((visibility("default"))) { + std::ostringstream msg; + msg << stream.sstream_->rdbuf(); + OutputLog(msg); + } + + private: + void OutputLog(const std::ostringstream &msg) const { aclAppLog(log_level_, func_, file_, line_, msg.str().c_str()); } + + const char *file_; + int line_; + const char *func_; + aclLogLevel log_level_; +}; + +#define MSILOG_IF(level) inference::LogWriter(__FILE__, __LINE__, __FUNCTION__, ACL_##level) < inference::LogStream() + +#define MSI_LOG(level) MSI_LOG_##level + +#define MSI_LOG_DEBUG MSILOG_IF(DEBUG) +#define MSI_LOG_INFO MSILOG_IF(INFO) +#define MSI_LOG_WARNING MSILOG_IF(WARNING) +#define MSI_LOG_ERROR MSILOG_IF(ERROR) + +#define MSI_ASSERT(item) + +} // namespace mindspore::inference + +#endif // ENABLE_ACL + +#endif // MINDSPORE_INFERENCE_LOG_H_ diff --git a/include/infer_tensor.h b/include/infer_tensor.h new file mode 100644 index 0000000000..1fce1e1a32 --- /dev/null +++ b/include/infer_tensor.h @@ -0,0 +1,191 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_INCLUDE_INFER_TENSOR_H_ +#define MINDSPORE_INCLUDE_INFER_TENSOR_H_ + +#include +#include +#include +#include +#include +#include + +#include "securec/include/securec.h" +#include "include/infer_log.h" + +namespace mindspore { +#define MS_API __attribute__((visibility("default"))) +namespace inference { + +enum DataType { + kMSI_Unknown = 0, + kMSI_Bool = 1, + kMSI_Int8 = 2, + kMSI_Int16 = 3, + kMSI_Int32 = 4, + kMSI_Int64 = 5, + kMSI_Uint8 = 6, + kMSI_Uint16 = 7, + kMSI_Uint32 = 8, + kMSI_Uint64 = 9, + kMSI_Float16 = 10, + kMSI_Float32 = 11, + kMSI_Float64 = 12, +}; + +class InferTensorBase { + public: + InferTensorBase() = default; + virtual ~InferTensorBase() = default; + + virtual DataType data_type() const = 0; + virtual void set_data_type(DataType type) = 0; + virtual std::vector shape() const = 0; + virtual void set_shape(const std::vector &shape) = 0; + virtual const void *data() const = 0; + virtual size_t data_size() const = 0; + virtual bool resize_data(size_t data_len) = 0; + virtual void *mutable_data() = 0; + + bool set_data(const void *data, size_t data_len) { + resize_data(data_len); + if (mutable_data() == nullptr) { + MSI_LOG_ERROR << "set data failed, data len " << data_len; + return false; + } + if (data_size() != data_len) { + MSI_LOG_ERROR << "set data failed, tensor current data size " << data_size() << " not match data len " + << data_len; + return false; + } + if (data_len == 0) { + return true; + } + memcpy_s(mutable_data(), data_size(), data, data_len); + return true; + } + + int64_t ElementNum() const { + std::vector shapex = shape(); + return std::accumulate(shapex.begin(), shapex.end(), 1LL, std::multiplies()); + } + + int GetTypeSize(DataType type) const { + const std::map type_size_map{ + {kMSI_Bool, sizeof(bool)}, {kMSI_Float64, sizeof(double)}, {kMSI_Int8, sizeof(int8_t)}, + {kMSI_Uint8, sizeof(uint8_t)}, {kMSI_Int16, sizeof(int16_t)}, {kMSI_Uint16, sizeof(uint16_t)}, + {kMSI_Int32, sizeof(int32_t)}, {kMSI_Uint32, sizeof(uint32_t)}, {kMSI_Int64, sizeof(int64_t)}, + {kMSI_Uint64, sizeof(uint64_t)}, {kMSI_Float16, sizeof(uint16_t)}, {kMSI_Float32, sizeof(float)}, + }; + auto it = type_size_map.find(type); + if (it != type_size_map.end()) { + return it->second; + } + return 0; + } +}; + +class InferTensor : public InferTensorBase { + public: + DataType type_; + std::vector shape_; + std::vector data_; + + public: + InferTensor() = default; + InferTensor(DataType type, std::vector shape, const void *data, size_t data_len) { + set_data_type(type); + set_shape(shape); + set_data(data, data_len); + } + + void set_data_type(DataType type) override { type_ = type; } + DataType data_type() const override { return type_; } + + void set_shape(const std::vector &shape) override { shape_ = shape; } + std::vector shape() const override { return shape_; } + + const void *data() const override { return data_.data(); } + size_t data_size() const override { return data_.size(); } + + bool resize_data(size_t data_len) override { + data_.resize(data_len); + return true; + } + void *mutable_data() override { return data_.data(); } +}; + +class RequestBase { + public: + virtual size_t size() const = 0; + virtual const InferTensorBase *operator[](size_t index) const = 0; +}; + +class ReplyBase { + public: + virtual size_t size() const = 0; + virtual InferTensorBase *operator[](size_t index) = 0; + virtual const InferTensorBase *operator[](size_t index) const = 0; + virtual InferTensorBase *add() = 0; + virtual void clear() = 0; +}; + +class VectorInferTensorWrapReply : public ReplyBase { + public: + explicit VectorInferTensorWrapReply(std::vector &tensor_list) : tensor_list_(tensor_list) {} + + size_t size() const { return tensor_list_.size(); } + InferTensorBase *operator[](size_t index) { + if (index >= tensor_list_.size()) { + MSI_LOG_ERROR << "visit invalid index " << index << " total size " << tensor_list_.size(); + return nullptr; + } + return &(tensor_list_[index]); + } + const InferTensorBase *operator[](size_t index) const { + if (index >= tensor_list_.size()) { + MSI_LOG_ERROR << "visit invalid index " << index << " total size " << tensor_list_.size(); + return nullptr; + } + return &(tensor_list_[index]); + } + InferTensorBase *add() { + tensor_list_.push_back(InferTensor()); + return &(tensor_list_.back()); + } + void clear() { tensor_list_.clear(); } + std::vector &tensor_list_; +}; + +class VectorInferTensorWrapRequest : public RequestBase { + public: + explicit VectorInferTensorWrapRequest(const std::vector &tensor_list) : tensor_list_(tensor_list) {} + + size_t size() const { return tensor_list_.size(); } + const InferTensorBase *operator[](size_t index) const { + if (index >= tensor_list_.size()) { + MSI_LOG_ERROR << "visit invalid index " << index << " total size " << tensor_list_.size(); + return nullptr; + } + return &(tensor_list_[index]); + } + const std::vector &tensor_list_; +}; + +} // namespace inference +} // namespace mindspore +#endif // MINDSPORE_INCLUDE_INFER_TENSOR_H_ diff --git a/include/inference.h b/include/inference.h index 9aac37f323..b8665db1fa 100644 --- a/include/inference.h +++ b/include/inference.h @@ -20,28 +20,32 @@ #include #include #include -#include "include/ms_tensor.h" +#include "include/infer_tensor.h" namespace mindspore { -class FuncGraph; namespace inference { -using VectorForMSTensorPtr = std::vector>; -class MS_API MSSession { + +class MS_API InferSession { public: - MSSession() = default; + InferSession() = default; + virtual ~InferSession() = default; + virtual bool InitEnv(const std::string &device_type, uint32_t device_id) = 0; + virtual bool FinalizeEnv() = 0; + virtual bool LoadModelFromFile(const std::string &file_name, uint32_t &model_id) = 0; + virtual bool UnloadModel(uint32_t model_id) = 0; + // override this method to avoid request/reply data copy + virtual bool ExecuteModel(uint32_t model_id, const RequestBase &request, ReplyBase &reply) = 0; - static std::shared_ptr CreateSession(const std::string &device, uint32_t device_id); + virtual bool ExecuteModel(uint32_t model_id, const std::vector &inputs, + std::vector &outputs) { + VectorInferTensorWrapRequest request(inputs); + VectorInferTensorWrapReply reply(outputs); + return ExecuteModel(model_id, request, reply); + } - virtual uint32_t CompileGraph(std::shared_ptr funcGraphPtr) = 0; - - virtual MultiTensor RunGraph(uint32_t graph_id, const VectorForMSTensorPtr &inputs) = 0; - - virtual bool CheckModelInputs(uint32_t graph_id, const VectorForMSTensorPtr &inputs) const = 0; + static std::shared_ptr CreateSession(const std::string &device, uint32_t device_id); }; -std::shared_ptr MS_API LoadModel(const char *model_buf, size_t size, const std::string &device); - -void MS_API ExitInference(); } // namespace inference } // namespace mindspore #endif // MINDSPORE_INCLUDE_MS_SESSION_H diff --git a/include/ms_tensor.h b/include/ms_tensor.h deleted file mode 100644 index fc59e12328..0000000000 --- a/include/ms_tensor.h +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MINDSPORE_INCLUDE_MS_TENSOR_H_ -#define MINDSPORE_INCLUDE_MS_TENSOR_H_ - -#include -#include -#include -#include "mindspore/core/ir/dtype/type_id.h" - -namespace mindspore { -#define MS_API __attribute__((visibility("default"))) -namespace inference { -class MS_API MSTensor { - public: - MSTensor() = default; - // brief Create a MSTensor pointer. - // - // param data_type DataTypeId of tensor to be created. - // param shape Shape of tensor to be created. - // return MSTensor pointer. - static MSTensor *CreateTensor(TypeId data_type, const std::vector &shape); - - ~MSTensor() = default; - - virtual TypeId data_type() const = 0; - - virtual TypeId set_data_type(const TypeId data_type) = 0; - - virtual std::vector shape() const = 0; - - virtual size_t set_shape(const std::vector &shape) = 0; - - virtual int DimensionSize(size_t index) const = 0; - // brief Get number of element in MSTensor. - // - // return Number of element in MSTensor. - virtual int ElementsNum() const = 0; - - virtual std::size_t hash() const = 0; - // brief Get byte size of data in MSTensor. - // - // return Byte size of data in MSTensor. - virtual size_t Size() const = 0; - // brief Get pointer of data in MSTensor. - // - // The data pointer can be used to both write or read data in MSTensor. - // - // return A pointer points to data in MSTensor. - virtual void *MutableData() const = 0; -}; -using MultiTensor = std::vector>; -} // namespace inference -} // namespace mindspore -#endif // MINDSPORE_INCLUDE_MS_TENSOR_H_ diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index 5b05091cc2..c967c3f7f1 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -297,7 +297,7 @@ set(LOAD_ONNX_SRC ${CMAKE_CURRENT_SOURCE_DIR}/utils/load_onnx/anf_model_parser.cc ) add_library(inference SHARED - ${CMAKE_CURRENT_SOURCE_DIR}/backend/session/session.cc + ${CMAKE_CURRENT_SOURCE_DIR}/backend/session/infer_session.cc ${LOAD_ONNX_SRC} ) target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY} diff --git a/mindspore/ccsrc/backend/session/ascend_inference_session.cc b/mindspore/ccsrc/backend/session/ascend_inference_session.cc index 0999bfada7..69bb7de3cc 100644 --- a/mindspore/ccsrc/backend/session/ascend_inference_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_inference_session.cc @@ -88,8 +88,7 @@ GraphId AscendInferenceSession::CompileGraph(NotNull func_graph) { return graph_id; } -bool AscendInferenceSession::CheckModelInputs(uint32_t graph_id, - const std::vector > &inputs) { +bool AscendInferenceSession::CheckModelInputs(uint32_t graph_id, const std::vector &inputs) const { MS_LOG(INFO) << "Start check client inputs, graph id : " << graph_id; auto kernel_graph = GetGraph(graph_id); MS_EXCEPTION_IF_NULL(kernel_graph); @@ -119,8 +118,7 @@ bool AscendInferenceSession::CheckModelInputs(uint32_t graph_id, return true; } -bool AscendInferenceSession::CompareInput(const std::shared_ptr &input, - const ParameterPtr ¶meter) { +bool AscendInferenceSession::CompareInput(const tensor::TensorPtr &input, const ParameterPtr ¶meter) const { MS_EXCEPTION_IF_NULL(input); MS_EXCEPTION_IF_NULL(parameter); // compare dims @@ -155,7 +153,7 @@ bool AscendInferenceSession::CompareInput(const std::shared_ptr shape) { +std::string AscendInferenceSession::PrintInputShape(std::vector shape) const { string res = "["; for (auto dim : shape) { res += " " + std::to_string(dim); diff --git a/mindspore/ccsrc/backend/session/ascend_inference_session.h b/mindspore/ccsrc/backend/session/ascend_inference_session.h index 00ff91d590..7f4f478002 100644 --- a/mindspore/ccsrc/backend/session/ascend_inference_session.h +++ b/mindspore/ccsrc/backend/session/ascend_inference_session.h @@ -39,9 +39,9 @@ class AscendInferenceSession : public AscendSession { void LoadInputData(const std::shared_ptr &kernel_graph, const std::vector &inputs_const) const; GraphId CompileGraph(NotNull func_graph) override; - bool CheckModelInputs(uint32_t graph_id, const std::vector> &inputs) override; - bool CompareInput(const std::shared_ptr &input, const ParameterPtr ¶meter); - std::string PrintInputShape(std::vector shape); + bool CheckModelInputs(uint32_t graph_id, const std::vector &inputs) const override; + bool CompareInput(const tensor::TensorPtr &input, const ParameterPtr ¶meter) const; + std::string PrintInputShape(std::vector shape) const; }; MS_REG_SESSION(kDavinciInferenceDevice, AscendInferenceSession); } // namespace session diff --git a/mindspore/ccsrc/backend/session/infer_session.cc b/mindspore/ccsrc/backend/session/infer_session.cc new file mode 100644 index 0000000000..0eef030565 --- /dev/null +++ b/mindspore/ccsrc/backend/session/infer_session.cc @@ -0,0 +1,362 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/session/infer_session.h" +#include +#include +#include "include/inference.h" +#include "utils/load_onnx/anf_converter.h" +#include "backend/session/session_basic.h" +#include "backend/session/session_factory.h" +#include "utils/base_ref_utils.h" +#include "backend/kernel_compiler/oplib/oplib.h" + +#ifdef ENABLE_D +#include "utils/context/ms_context.h" +#endif + +using std::string; +using std::vector; + +namespace py = pybind11; +namespace mindspore::inference { + +std::shared_ptr InferSession::CreateSession(const std::string &device, uint32_t device_id) { + try { + auto session = std::make_shared(); + bool ret = session->InitEnv(device, device_id); + if (!ret) { + return nullptr; + } + return session; + } catch (std::bad_alloc &e) { + MS_LOG(ERROR) << "Inference CreatSession failed, failed to alloc memory"; + return nullptr; + } +} + +MSInferSession::MSInferSession() = default; +MSInferSession::~MSInferSession() = default; + +std::shared_ptr> MSInferSession::ReadFile(const std::string &file) { + if (file.empty()) { + MS_LOG(ERROR) << "file is nullptr"; + return nullptr; + } + std::string realPath = file; + std::ifstream ifs(realPath); + if (!ifs.good()) { + MS_LOG(ERROR) << "file: " << realPath << " is not exist"; + return nullptr; + } + + if (!ifs.is_open()) { + MS_LOG(ERROR) << "file: " << realPath << "open failed"; + return nullptr; + } + + ifs.seekg(0, std::ios::end); + size_t size = ifs.tellg(); + std::shared_ptr> buf(new (std::nothrow) std::vector(size)); + if (buf == nullptr) { + MS_LOG(ERROR) << "malloc buf failed, file: " << realPath; + ifs.close(); + return nullptr; + } + + ifs.seekg(0, std::ios::beg); + ifs.read(buf->data(), size); + ifs.close(); + + return buf; +} + +bool MSInferSession::LoadModelFromFile(const std::string &file_name, uint32_t &model_id) { + auto graphBuf = ReadFile(file_name); + if (graphBuf == nullptr) { + MS_LOG(ERROR) << "Read model file failed, file name is " << file_name.c_str(); + return false; + } + auto graph = LoadModel(graphBuf->data(), graphBuf->size(), device_type_); + if (graph == nullptr) { + MS_LOG(ERROR) << "Load graph model failed, file name is " << file_name.c_str(); + return false; + } + bool ret = CompileGraph(graph, model_id); + if (!ret) { + MS_LOG(ERROR) << "Compile graph model failed, file name is " << file_name.c_str(); + return false; + } + MS_LOG(INFO) << "Load model from file " << file_name << " success"; + +#ifdef ENABLE_D + // set d context + rtError_t rt_ret = rtCtxGetCurrent(&context_); + if (rt_ret != RT_ERROR_NONE || context_ == nullptr) { + MS_LOG(ERROR) << "the ascend device context is null"; + return false; + } +#endif + + return true; +} + +bool MSInferSession::UnloadModel(uint32_t model_id) { return true; } + +tensor::TensorPtr ServingTensor2MSTensor(const InferTensorBase &out_tensor) { + std::vector shape; + for (auto dim : out_tensor.shape()) { + shape.push_back(static_cast(dim)); + } + TypeId data_type; + const std::map type2id_map{ + {inference::kMSI_Unknown, TypeId::kNumberTypeBegin}, {inference::kMSI_Bool, TypeId::kNumberTypeBool}, + {inference::kMSI_Int8, TypeId::kNumberTypeInt8}, {inference::kMSI_Uint8, TypeId::kNumberTypeUInt8}, + {inference::kMSI_Int16, TypeId::kNumberTypeInt16}, {inference::kMSI_Uint16, TypeId::kNumberTypeUInt16}, + {inference::kMSI_Int32, TypeId::kNumberTypeInt32}, {inference::kMSI_Uint32, TypeId::kNumberTypeUInt32}, + {inference::kMSI_Int64, TypeId::kNumberTypeInt64}, {inference::kMSI_Uint64, TypeId::kNumberTypeUInt64}, + {inference::kMSI_Float16, TypeId::kNumberTypeFloat16}, {inference::kMSI_Float32, TypeId::kNumberTypeFloat32}, + {inference::kMSI_Float64, TypeId::kNumberTypeFloat64}, + }; + auto it = type2id_map.find(out_tensor.data_type()); + if (it == type2id_map.end()) { + MSI_LOG_WARNING << "undefined MSI data type " << out_tensor.data_type(); + return nullptr; + } else { + data_type = it->second; + } + + auto ms_tensor = std::make_shared(data_type, shape); + memcpy_s(ms_tensor->data_c(), ms_tensor->Size(), out_tensor.data(), out_tensor.data_size()); + return ms_tensor; +} + +void MSTensor2ServingTensor(tensor::TensorPtr ms_tensor, InferTensorBase &out_tensor) { + vector shape; + for (auto dim : ms_tensor->shape()) { + shape.push_back(dim); + } + out_tensor.set_shape(shape); + + const std::map id2type_map{ + {TypeId::kNumberTypeBegin, inference::kMSI_Unknown}, {TypeId::kNumberTypeBool, inference::kMSI_Bool}, + {TypeId::kNumberTypeFloat64, inference::kMSI_Float64}, {TypeId::kNumberTypeInt8, inference::kMSI_Int8}, + {TypeId::kNumberTypeUInt8, inference::kMSI_Uint8}, {TypeId::kNumberTypeInt16, inference::kMSI_Int16}, + {TypeId::kNumberTypeUInt16, inference::kMSI_Uint16}, {TypeId::kNumberTypeInt32, inference::kMSI_Int32}, + {TypeId::kNumberTypeUInt32, inference::kMSI_Uint32}, {TypeId::kNumberTypeInt64, inference::kMSI_Int64}, + {TypeId::kNumberTypeUInt64, inference::kMSI_Uint64}, {TypeId::kNumberTypeFloat16, inference::kMSI_Float16}, + {TypeId::kNumberTypeFloat32, inference::kMSI_Float32}, + }; + auto it = id2type_map.find(ms_tensor->data_type()); + if (it == id2type_map.end()) { + MSI_LOG_WARNING << "undefined MS data type " << ms_tensor->data_type(); + out_tensor.set_data_type(inference::kMSI_Unknown); + } else { + out_tensor.set_data_type(it->second); + } + out_tensor.set_data(ms_tensor->data_c(), ms_tensor->Size()); +} + +bool MSInferSession::ExecuteModel(uint32_t model_id, const RequestBase &request, ReplyBase &reply) { +#ifdef ENABLE_D + if (context_ == nullptr) { + MS_LOG(ERROR) << "rtCtx is nullptr"; + return false; + } + rtError_t rt_ret = rtCtxSetCurrent(context_); + if (rt_ret != RT_ERROR_NONE) { + MS_LOG(ERROR) << "set Ascend rtCtx failed"; + return false; + } +#endif + + vector inputs; + for (size_t i = 0; i < request.size(); i++) { + if (request[i] == nullptr) { + MS_LOG(ERROR) << "Execute Model " << model_id << " Failed, input tensor is null, index " << i; + return false; + } + auto input = ServingTensor2MSTensor(*request[i]); + if (input == nullptr) { + MS_LOG(ERROR) << "Tensor convert failed"; + return false; + } + inputs.push_back(input); + } + if (!CheckModelInputs(model_id, inputs)) { + MS_LOG(ERROR) << "Check Model " << model_id << " Inputs Failed"; + return false; + } + vector outputs = RunGraph(model_id, inputs); + if (outputs.empty()) { + MS_LOG(ERROR) << "Execute Model " << model_id << " Failed"; + return false; + } + reply.clear(); + for (const auto &tensor : outputs) { + auto out_tensor = reply.add(); + if (out_tensor == nullptr) { + MS_LOG(ERROR) << "Execute Model " << model_id << " Failed, add output tensor failed"; + return false; + } + MSTensor2ServingTensor(tensor, *out_tensor); + } + return true; +} + +bool MSInferSession::FinalizeEnv() { + auto ms_context = MsContext::GetInstance(); + if (ms_context == nullptr) { + MS_LOG(ERROR) << "Get Context failed!"; + return false; + } + if (!ms_context->CloseTsd()) { + MS_LOG(ERROR) << "Inference CloseTsd failed!"; + return false; + } + return true; +} + +std::shared_ptr MSInferSession::LoadModel(const char *model_buf, size_t size, const std::string &device) { + try { + auto anf_graph = lite::AnfConverter::RunAnfConverter(model_buf, size); + return anf_graph; + } catch (std::exception &e) { + MS_LOG(ERROR) << "Inference LoadModel failed"; + return nullptr; + } +} + +void MSInferSession::RegAllOp() { + static std::mutex init_mutex; + static bool Initialized = false; + + std::lock_guard lock(init_mutex); + if (Initialized) { + return; + } + Initialized = true; + MsContext::GetInstance()->set_execution_mode(kGraphMode); + Py_Initialize(); + auto c_expression = PyImport_ImportModule("mindspore._c_expression"); + if (c_expression == nullptr) { + MS_LOG(EXCEPTION) << "Failed to import mindspore._c_expression module."; + return; + } + PyObject *c_expression_dict = PyModule_GetDict(c_expression); + + PyObject *op_info_loader_class = PyDict_GetItemString(c_expression_dict, "OpInfoLoaderPy"); + if (op_info_loader_class == nullptr) { + MS_LOG(EXCEPTION) << "Failed to get op_info_loader_class from mindspore._c_expression."; + return; + } + PyObject *op_info_loader = PyInstanceMethod_New(op_info_loader_class); + if (op_info_loader == nullptr) { + MS_LOG(EXCEPTION) << "Failed to create op_info_loader instance."; + return; + } + PyObject *op_info_loader_ins = PyObject_CallObject(op_info_loader, nullptr); + if (op_info_loader_ins == nullptr) { + MS_LOG(EXCEPTION) << "Failed to call op_info_loader instance."; + return; + } + auto all_ops_info_vector_addr_ul = PyObject_CallMethod(op_info_loader_ins, "get_all_ops_info", nullptr); + if (all_ops_info_vector_addr_ul == nullptr) { + MS_LOG(EXCEPTION) << "Failed to call get_all_ops_addr."; + return; + } + auto all_ops_info_vector_addr = PyLong_AsVoidPtr(all_ops_info_vector_addr_ul); + auto all_ops_info = static_cast *>(all_ops_info_vector_addr); + for (auto op_info : *all_ops_info) { + kernel::OpLib::RegOpInfo(std::shared_ptr(op_info)); + } + all_ops_info->clear(); + delete all_ops_info; + Py_DECREF(op_info_loader); + Py_DECREF(op_info_loader_class); + Py_DECREF(c_expression_dict); + Py_DECREF(c_expression); + return; +} + +bool MSInferSession::CompileGraph(std::shared_ptr funcGraphPtr, uint32_t &model_id) { + MS_ASSERT(session_impl_ != nullptr); + try { + auto graph_id = session_impl_->CompileGraph(NOT_NULL(funcGraphPtr)); + py::gil_scoped_release gil_release; + model_id = graph_id; + return true; + } catch (std::exception &e) { + MS_LOG(ERROR) << "Inference CompileGraph failed"; + return false; + } +} + +std::vector MSInferSession::RunGraph(uint32_t graph_id, + const std::vector &inputs) { + try { + VectorRef outputs; + session_impl_->RunGraph(graph_id, inputs, &outputs); + + return TransformVectorRefToMultiTensor(outputs); + } catch (std::exception &e) { + MS_LOG(ERROR) << "Inference Rungraph failed"; + return std::vector(); + } +} + +string MSInferSession::AjustTargetName(const std::string &device) { + if (device == kAscendDevice) { + return std::string(kAscendDevice) + "Inference"; + } else { + MS_LOG(ERROR) << "Only support device Ascend right now"; + return ""; + } +} + +bool MSInferSession::InitEnv(const std::string &device, uint32_t device_id) { + RegAllOp(); + auto ms_context = MsContext::GetInstance(); + ms_context->set_execution_mode(kGraphMode); + ms_context->set_device_id(device_id); + auto ajust_device = AjustTargetName(device); + if (ajust_device == "") { + return false; + } + ms_context->set_device_target(device); + session_impl_ = session::SessionFactory::Get().Create(ajust_device); + if (session_impl_ == nullptr) { + MS_LOG(ERROR) << "Session create failed!, please make sure target device:" << device << " is available."; + return false; + } + session_impl_->Init(device_id); + if (ms_context == nullptr) { + MS_LOG(ERROR) << "Get Context failed!"; + return false; + } + if (!ms_context->OpenTsd()) { + MS_LOG(ERROR) << "Session init OpenTsd failed!"; + return false; + } + return true; +} + +bool MSInferSession::CheckModelInputs(uint32_t graph_id, const std::vector &inputs) const { + MS_ASSERT(session_impl_ != nullptr); + return session_impl_->CheckModelInputs(graph_id, inputs); +} + +} // namespace mindspore::inference diff --git a/mindspore/ccsrc/backend/session/infer_session.h b/mindspore/ccsrc/backend/session/infer_session.h new file mode 100644 index 0000000000..edebd8834f --- /dev/null +++ b/mindspore/ccsrc/backend/session/infer_session.h @@ -0,0 +1,66 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_SESSION_SESSION_H +#define MINDSPORE_CCSRC_SESSION_SESSION_H + +#include +#include +#include +#include +#include +#include + +#include "backend/session/session_basic.h" +#include "ir/anf.h" +#include "include/inference.h" + +#ifdef ENABLE_D +#include "runtime/context.h" +#endif + +namespace mindspore { +namespace inference { +class MSInferSession : public InferSession { + public: + MSInferSession(); + ~MSInferSession(); + + bool InitEnv(const std::string &device_type, uint32_t device_id) override; + bool FinalizeEnv() override; + bool LoadModelFromFile(const std::string &file_name, uint32_t &model_id) override; + bool UnloadModel(uint32_t model_id) override; + bool ExecuteModel(uint32_t model_id, const RequestBase &inputs, ReplyBase &outputs) override; + + private: + std::shared_ptr session_impl_ = nullptr; + std::vector graph_id_; + std::string device_type_; + int32_t device_id_; +#ifdef ENABLE_D + rtContext_t context_ = nullptr; +#endif + + std::shared_ptr LoadModel(const char *model_buf, size_t size, const std::string &device); + std::shared_ptr> ReadFile(const std::string &file); + static void RegAllOp(); + string AjustTargetName(const std::string &device); + bool CompileGraph(std::shared_ptr funcGraphPtr, uint32_t &model_id); + bool CheckModelInputs(uint32_t graph_id, const std::vector &inputs) const; + std::vector RunGraph(uint32_t graph_id, const std::vector &inputs); +}; +} // namespace inference +} // namespace mindspore +#endif // MINDSPORE_CCSRC_SESSION_SESSION_BASIC_H diff --git a/mindspore/ccsrc/backend/session/session.cc b/mindspore/ccsrc/backend/session/session.cc deleted file mode 100644 index b5c9c69537..0000000000 --- a/mindspore/ccsrc/backend/session/session.cc +++ /dev/null @@ -1,214 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include "include/inference.h" -#include "backend/session/session.h" -#include "utils/load_onnx/anf_converter.h" -#include "backend/session/session_basic.h" -#include "backend/session/session_factory.h" -#include "utils/base_ref_utils.h" -#include "backend/kernel_compiler/oplib/oplib.h" -#ifdef ENABLE_D -#include "utils/context/ms_context.h" -#include "backend/session/ascend_session.h" -#else -#include "backend/session/cpu_session.h" -#endif - -namespace py = pybind11; -namespace mindspore::inference { -std::shared_ptr LoadModel(const char *model_buf, size_t size, const std::string &device) { - try { - inference::Session::RegAllOp(); - auto anf_graph = lite::AnfConverter::RunAnfConverter(model_buf, size); - return anf_graph; - } catch (std::exception &e) { - MS_LOG(ERROR) << "Inference LoadModel failed"; - return nullptr; - } -} - -void ExitInference() { - auto ms_context = MsContext::GetInstance(); - if (ms_context == nullptr) { - MS_LOG(ERROR) << "Get Context failed!"; - return; - } - if (!ms_context->CloseTsd()) { - MS_LOG(ERROR) << "Inference CloseTsd failed!"; - return; - } -} - -std::shared_ptr MSSession::CreateSession(const std::string &device, uint32_t device_id) { - try { - auto session = std::make_shared(); - auto ret = session->Init(device, device_id); - if (ret != 0) { - return nullptr; - } - return session; - } catch (std::exception &e) { - MS_LOG(ERROR) << "Inference CreatSession failed"; - return nullptr; - } -} - -void Session::RegAllOp() { - static std::mutex init_mutex; - static bool Initialized = false; - - std::lock_guard lock(init_mutex); - if (Initialized) { - return; - } - Initialized = true; - MsContext::GetInstance()->set_execution_mode(kGraphMode); - Py_Initialize(); - auto c_expression = PyImport_ImportModule("mindspore._c_expression"); - if (c_expression == nullptr) { - MS_LOG(EXCEPTION) << "Failed to import mindspore._c_expression module."; - return; - } - PyObject *c_expression_dict = PyModule_GetDict(c_expression); - - PyObject *op_info_loader_class = PyDict_GetItemString(c_expression_dict, "OpInfoLoaderPy"); - if (op_info_loader_class == nullptr) { - MS_LOG(EXCEPTION) << "Failed to get op_info_loader_class from mindspore._c_expression."; - return; - } - PyObject *op_info_loader = PyInstanceMethod_New(op_info_loader_class); - if (op_info_loader == nullptr) { - MS_LOG(EXCEPTION) << "Failed to create op_info_loader instance."; - return; - } - PyObject *op_info_loader_ins = PyObject_CallObject(op_info_loader, nullptr); - if (op_info_loader_ins == nullptr) { - MS_LOG(EXCEPTION) << "Failed to call op_info_loader instance."; - return; - } - auto all_ops_info_vector_addr_ul = PyObject_CallMethod(op_info_loader_ins, "get_all_ops_info", nullptr); - if (all_ops_info_vector_addr_ul == nullptr) { - MS_LOG(EXCEPTION) << "Failed to call get_all_ops_addr."; - return; - } - auto all_ops_info_vector_addr = PyLong_AsVoidPtr(all_ops_info_vector_addr_ul); - auto all_ops_info = static_cast *>(all_ops_info_vector_addr); - for (auto op_info : *all_ops_info) { - kernel::OpLib::RegOpInfo(std::shared_ptr(op_info)); - } - all_ops_info->clear(); - delete all_ops_info; - Py_DECREF(op_info_loader); - Py_DECREF(op_info_loader_class); - Py_DECREF(c_expression_dict); - Py_DECREF(c_expression); - return; -} - -uint32_t Session::CompileGraph(std::shared_ptr funcGraphPtr) { - MS_ASSERT(session_impl_ != nullptr); - try { - auto graph_id = session_impl_->CompileGraph(NOT_NULL(funcGraphPtr)); - py::gil_scoped_release gil_release; - return graph_id; - } catch (std::exception &e) { - MS_LOG(ERROR) << "Inference CompileGraph failed"; - return static_cast(-1); - } -} - -MultiTensor Session::RunGraph(uint32_t graph_id, const std::vector> &inputs) { - try { - std::vector inTensors; - inTensors.resize(inputs.size()); - bool has_error = false; - std::transform(inputs.begin(), inputs.end(), inTensors.begin(), - [&has_error](const std::shared_ptr &tensor_ptr) -> tensor::TensorPtr { - if (tensor_ptr == nullptr) { - MS_LOG(WARNING) << "input MSTensor is nullptr, return nullptr"; - has_error = true; - return nullptr; - } - auto tensor = static_cast(tensor_ptr.get()); - if (tensor == nullptr) { - MS_LOG(ERROR) << "Can not cast input MSTensor to tensor"; - has_error = true; - return nullptr; - } - return tensor->tensor(); - }); - if (has_error) { - MS_LOG(ERROR) << "Init Tensor failed, returning empty result"; - std::vector> multiTensor; - return multiTensor; - } - VectorRef outputs; - session_impl_->RunGraph(graph_id, inTensors, &outputs); - - return TransformVectorRefToMultiTensor(outputs); - } catch (std::exception &e) { - MS_LOG(ERROR) << "Inference Rungraph failed"; - return MultiTensor(); - } -} -namespace { -string AjustTargetName(const std::string &device) { - if (device == kAscendDevice) { - return std::string(kAscendDevice) + "Inference"; - } else { - MS_LOG(ERROR) << "Only support device Ascend right now"; - return ""; - } -} -} // namespace -int Session::Init(const std::string &device, uint32_t device_id) { - RegAllOp(); - auto ms_context = MsContext::GetInstance(); - ms_context->set_execution_mode(kGraphMode); - ms_context->set_device_id(device_id); - auto ajust_device = AjustTargetName(device); - if (ajust_device == "") { - return -1; - } - ms_context->set_device_target(device); - session_impl_ = session::SessionFactory::Get().Create(ajust_device); - if (session_impl_ == nullptr) { - MS_LOG(ERROR) << "Session create failed!, please make sure target device:" << device << " is available."; - return -1; - } - session_impl_->Init(device_id); - if (ms_context == nullptr) { - MS_LOG(ERROR) << "Get Context failed!"; - return -1; - } - if (!ms_context->OpenTsd()) { - MS_LOG(ERROR) << "Session init OpenTsd failed!"; - return -1; - } - return 0; -} - -bool Session::CheckModelInputs(uint32_t graph_id, - const std::vector> &inputs) const { - MS_ASSERT(session_impl_ != nullptr); - return session_impl_->CheckModelInputs(graph_id, inputs); -} - -Session::Session() = default; -} // namespace mindspore::inference diff --git a/mindspore/ccsrc/backend/session/session_basic.cc b/mindspore/ccsrc/backend/session/session_basic.cc index 91939d493a..e18fa40600 100644 --- a/mindspore/ccsrc/backend/session/session_basic.cc +++ b/mindspore/ccsrc/backend/session/session_basic.cc @@ -276,7 +276,7 @@ bool ExistSummaryNode(const KernelGraph *graph) { GraphId SessionBasic::graph_sum_ = 0; -KernelGraphPtr SessionBasic::GetGraph(mindspore::GraphId graph_id) { +KernelGraphPtr SessionBasic::GetGraph(mindspore::GraphId graph_id) const { auto it = graphs_.find(graph_id); if (it == graphs_.end()) { MS_LOG(WARNING) << "Can't find graph " << graph_id; diff --git a/mindspore/ccsrc/backend/session/session_basic.h b/mindspore/ccsrc/backend/session/session_basic.h index 99e6a2b08d..367b1fe80a 100755 --- a/mindspore/ccsrc/backend/session/session_basic.h +++ b/mindspore/ccsrc/backend/session/session_basic.h @@ -106,9 +106,7 @@ class SessionBasic { virtual void GetSummaryNodes(KernelGraph *graph); void AssignParamKey(const KernelGraphPtr &kernel_graph); void InitPSParamAndOptim(const KernelGraphPtr &kernel_graph, const std::vector &inputs_const); - virtual bool CheckModelInputs(uint32_t graph_id, const std::vector> &inputs) { - return true; - } + virtual bool CheckModelInputs(uint32_t graph_id, const std::vector &inputs) const { return true; } #ifdef ENABLE_DEBUGGER // set debugger @@ -120,7 +118,7 @@ class SessionBasic { protected: // Get graph by graph id ,if not exist return null ptr - KernelGraphPtr GetGraph(GraphId graph_id); + KernelGraphPtr GetGraph(GraphId graph_id) const; virtual void LoadInputData(const std::shared_ptr &kernel_graph, const std::vector &inputs_const) const; void UpdateOutputs(const std::shared_ptr &kernel_graph, VectorRef *const outputs, diff --git a/mindspore/ccsrc/utils/base_ref_utils.cc b/mindspore/ccsrc/utils/base_ref_utils.cc index 87089c6266..2c1223a13e 100644 --- a/mindspore/ccsrc/utils/base_ref_utils.cc +++ b/mindspore/ccsrc/utils/base_ref_utils.cc @@ -17,17 +17,17 @@ #include #include #include "utils/base_ref_utils.h" -#include "include/ms_tensor.h" +#include "include/infer_tensor.h" #include "ir/tensor.h" namespace mindspore { -void IterateFindTensor(std::vector> *msTensors, const VectorRef &ref_list) { + +void IterateFindTensor(std::vector *msTensors, const VectorRef &ref_list) { for (size_t i = 0; i < ref_list.size(); ++i) { if (utils::isa(ref_list[i])) { auto tensor_ptr = utils::cast>(ref_list[i]); MS_EXCEPTION_IF_NULL(tensor_ptr); - auto tensor = new inference::Tensor(tensor_ptr); - msTensors->emplace_back(std::shared_ptr(tensor)); + msTensors->emplace_back(tensor_ptr); } else if (utils::isa(ref_list[i])) { auto ref_iter = utils::cast(ref_list[i]); IterateFindTensor(msTensors, ref_iter); @@ -37,19 +37,19 @@ void IterateFindTensor(std::vector> *msTens } } -std::vector> TransformVectorRefToMultiTensor(const VectorRef &base_ref) { - std::vector> msTensors; +std::vector TransformVectorRefToMultiTensor(const VectorRef &base_ref) { + std::vector msTensors; if (utils::isa(base_ref)) { auto ref_list = utils::cast(base_ref); IterateFindTensor(&msTensors, ref_list); } else if (utils::isa(base_ref)) { auto tensor_ptr = utils::cast>(base_ref); MS_EXCEPTION_IF_NULL(tensor_ptr); - auto tensor = new inference::Tensor(tensor_ptr); - msTensors.emplace_back(std::shared_ptr(tensor)); + msTensors.emplace_back(tensor_ptr); } else { MS_LOG(EXCEPTION) << "The output is not a base ref list or a tensor!"; } return msTensors; } + } // namespace mindspore diff --git a/mindspore/ccsrc/utils/base_ref_utils.h b/mindspore/ccsrc/utils/base_ref_utils.h index 2503eab738..04a6fcefda 100644 --- a/mindspore/ccsrc/utils/base_ref_utils.h +++ b/mindspore/ccsrc/utils/base_ref_utils.h @@ -17,11 +17,12 @@ #include #include #include "utils/base_ref.h" -#include "include/ms_tensor.h" +#include "include/infer_tensor.h" +#include "ir/tensor.h" #ifndef MINDSPORE_CCSRC_UTILS_BASE_REF_UTILS_H #define MINDSPORE_CCSRC_UTILS_BASE_REF_UTILS_H namespace mindspore { -std::vector> TransformVectorRefToMultiTensor(const VectorRef &base_ref); +std::vector TransformVectorRefToMultiTensor(const VectorRef &base_ref); } // namespace mindspore #endif // MINDSPORE_CCSRC_UTILS_BASE_REF_UTILS_H diff --git a/mindspore/core/ir/lite/tensor.cc b/mindspore/core/ir/lite/tensor.cc index 2957495aa4..9c3921eadd 100644 --- a/mindspore/core/ir/lite/tensor.cc +++ b/mindspore/core/ir/lite/tensor.cc @@ -85,68 +85,4 @@ bool Tensor::operator==(const Value &other) const { } } } // namespace tensor - -namespace inference { -MSTensor *MSTensor::CreateTensor(TypeId data_type, const std::vector &shape) { - return new Tensor(data_type, shape); -} - -Tensor::Tensor() { this->tensor_impl_ = std::make_shared(); } - -Tensor::Tensor(TypeId data_type, const std::vector &shape) { - this->tensor_impl_ = std::make_shared(data_type, shape); -} - -Tensor::Tensor(std::shared_ptr tensor_ptr) { this->tensor_impl_ = std::move(tensor_ptr); } - -TypeId Tensor::data_type() const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->data_type(); -} - -TypeId Tensor::set_data_type(TypeId data_type) { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->set_data_type(data_type); -} - -std::vector Tensor::shape() const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->shape(); -} - -size_t Tensor::set_shape(const std::vector &shape) { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->set_shape(shape); -} - -int Tensor::DimensionSize(size_t index) const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->DimensionSize(index); -} - -int Tensor::ElementsNum() const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->ElementsNum(); -} - -std::size_t Tensor::hash() const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->hash(); -} - -std::shared_ptr Tensor::tensor() const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_; -} - -size_t Tensor::Size() const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->Size(); -} - -void *Tensor::MutableData() const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->data(); -} -} // namespace inference } // namespace mindspore diff --git a/mindspore/core/ir/lite/tensor.h b/mindspore/core/ir/lite/tensor.h index 0e180f2421..7644f7d84f 100644 --- a/mindspore/core/ir/lite/tensor.h +++ b/mindspore/core/ir/lite/tensor.h @@ -56,42 +56,6 @@ class Tensor : public MetaTensor { using TensorPtr = std::shared_ptr; } // namespace tensor - -namespace inference { -class Tensor : public MSTensor { - public: - Tensor(); - - Tensor(TypeId data_type, const std::vector &shape); - - explicit Tensor(std::shared_ptr tensor_ptr); - - ~Tensor() = default; - - TypeId data_type() const override; - - TypeId set_data_type(const TypeId data_type) override; - - std::vector shape() const override; - - size_t set_shape(const std::vector &shape) override; - - int DimensionSize(size_t index) const override; - - int ElementsNum() const override; - - std::size_t hash() const override; - - std::shared_ptr tensor() const; - - size_t Size() const override; - - void *MutableData() const override; - - protected: - std::shared_ptr tensor_impl_; -}; -} // namespace inference } // namespace mindspore #endif // MINDSPORE_CORE_IR_LITE_TENSOR_H_ diff --git a/mindspore/core/ir/tensor.cc b/mindspore/core/ir/tensor.cc index c04c2cca96..0dca092a1d 100644 --- a/mindspore/core/ir/tensor.cc +++ b/mindspore/core/ir/tensor.cc @@ -454,67 +454,4 @@ TypeId Tensor::set_data_type(const TypeId data_type) { return data_type; } } // namespace tensor - -namespace inference { -MSTensor *MSTensor::CreateTensor(TypeId data_type, const std::vector &shape) { - return new Tensor(data_type, shape); -} - -Tensor::Tensor(TypeId data_type, const std::vector &shape) { - this->tensor_impl_ = std::make_shared(data_type, shape); -} - -Tensor::Tensor(std::shared_ptr tensor_ptr) { this->tensor_impl_ = std::move(tensor_ptr); } - -TypeId Tensor::data_type() const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->data_type(); -} - -TypeId Tensor::set_data_type(TypeId data_type) { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->set_data_type(data_type); -} - -std::vector Tensor::shape() const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->shape(); -} - -size_t Tensor::set_shape(const std::vector &shape) { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->set_shape(shape); -} - -int Tensor::DimensionSize(size_t index) const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->DimensionSize(index); -} - -int Tensor::ElementsNum() const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->ElementsNum(); -} - -std::size_t Tensor::hash() const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->hash(); -} - -std::shared_ptr Tensor::tensor() const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_; -} - -size_t Tensor::Size() const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->data().nbytes(); -} - -void *Tensor::MutableData() const { - MS_ASSERT(this->tensor_impl_ != nullptr); - return this->tensor_impl_->data_c(); -} - -} // namespace inference } // namespace mindspore diff --git a/mindspore/core/ir/tensor.h b/mindspore/core/ir/tensor.h index d7d8da6039..ce319e7443 100644 --- a/mindspore/core/ir/tensor.h +++ b/mindspore/core/ir/tensor.h @@ -25,7 +25,6 @@ #include "Eigen/Core" #include "ir/device_sync.h" #include "ir/meta_tensor.h" -#include "include/ms_tensor.h" #include "utils/log_adapter.h" using float16 = Eigen::half; @@ -237,40 +236,6 @@ class Tensor : public MetaTensor { using TensorPtr = std::shared_ptr; using TensorPtrList = std::vector>; } // namespace tensor - -namespace inference { -class Tensor : public MSTensor { - public: - Tensor(TypeId data_type, const std::vector &shape); - - explicit Tensor(std::shared_ptr tensor_ptr); - - ~Tensor() = default; - - TypeId data_type() const override; - - TypeId set_data_type(const TypeId data_type) override; - - std::vector shape() const override; - - size_t set_shape(const std::vector &shape) override; - - int DimensionSize(size_t index) const override; - - int ElementsNum() const override; - - std::size_t hash() const override; - - std::shared_ptr tensor() const; - - size_t Size() const override; - - void *MutableData() const override; - - protected: - std::shared_ptr tensor_impl_; -}; -} // namespace inference } // namespace mindspore #endif // MINDSPORE_CORE_IR_TENSOR_H_ diff --git a/serving/CMakeLists.txt b/serving/CMakeLists.txt index fdd56fffc4..c9c41d4f89 100644 --- a/serving/CMakeLists.txt +++ b/serving/CMakeLists.txt @@ -13,19 +13,19 @@ add_library(protobuf::libprotobuf ALIAS protobuf::protobuf) add_executable(protobuf::libprotoc ALIAS protobuf::protoc) set(_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf) -if(CMAKE_CROSSCOMPILING) +if (CMAKE_CROSSCOMPILING) find_program(_PROTOBUF_PROTOC protoc) -else() +else () set(_PROTOBUF_PROTOC $) -endif() +endif () # Find gRPC installation # Looks for gRPCConfig.cmake file installed by gRPC's cmake installation. if (EXISTS ${grpc_ROOT}/lib64) set(gRPC_DIR "${grpc_ROOT}/lib64/cmake/grpc") -else() +else () set(gRPC_DIR "${grpc_ROOT}/lib/cmake/grpc") -endif() +endif () message("serving using grpc_DIR : " ${gPRC_DIR}) find_package(gRPC CONFIG REQUIRED) @@ -34,11 +34,11 @@ message(STATUS "Using gRPC ${gRPC_VERSION}") set(_GRPC_GRPCPP gRPC::grpc++) set(_REFLECTION gRPC::grpc++_reflection) -if(CMAKE_CROSSCOMPILING) +if (CMAKE_CROSSCOMPILING) find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin) -else() +else () set(_GRPC_CPP_PLUGIN_EXECUTABLE $) -endif() +endif () # Proto file get_filename_component(hw_proto "ms_service.proto" ABSOLUTE) @@ -67,11 +67,36 @@ file(GLOB_RECURSE CORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} list(APPEND SERVING_SRC "main.cc" ${hw_proto_srcs} ${hw_grpc_srcs} ${CORE_SRC_LIST}) +option(ENABLE_ACL "enable acl" OFF) + +if (ENABLE_ACL) + if (DEFINED ENV{ASCEND_CUSTOM_PATH}) + set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH}) + else () + set(ASCEND_PATH /usr/local/Ascend) + endif () + set(ACL_LIB_DIR ${ASCEND_PATH}/acllib/) + MESSAGE("acl lib dir " ${ACL_LIB_DIR}) + + include_directories(${ACL_LIB_DIR}/include/) + file(GLOB_RECURSE ACL_SESSION_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "acl/*.cc") + list(APPEND SERVING_SRC ${ACL_SESSION_SRC_LIST}) +endif () + include_directories(${CMAKE_BINARY_DIR}) add_executable(ms_serving ${SERVING_SRC}) -target_link_libraries(ms_serving inference mindspore_gvar) + target_link_libraries(ms_serving ${_REFLECTION} ${_GRPC_GRPCPP} ${_PROTOBUF_LIBPROTOBUF} pthread) if (ENABLE_D) add_compile_definitions(ENABLE_D) target_link_libraries(ms_serving ${RUNTIME_LIB}) -endif() +endif () + +if (ENABLE_ACL) + add_compile_definitions(ENABLE_ACL) + set(ALC_LIB_SO ${ACL_LIB_DIR}/lib64/libruntime.so ${ACL_LIB_DIR}/lib64/libascendcl.so + ${ACL_LIB_DIR}/lib64/libacl_retr.so ${ACL_LIB_DIR}/lib64/libacl_cblas.so) + target_link_libraries(ms_serving ${ALC_LIB_SO}) +else () + target_link_libraries(ms_serving inference mindspore_gvar) +endif () diff --git a/serving/acl/acl_session.cc b/serving/acl/acl_session.cc new file mode 100644 index 0000000000..dcbb43689d --- /dev/null +++ b/serving/acl/acl_session.cc @@ -0,0 +1,136 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "serving/acl/acl_session.h" +#include "include/infer_log.h" + +namespace mindspore::inference { + +std::shared_ptr InferSession::CreateSession(const std::string &device, uint32_t device_id) { + try { + auto session = std::make_shared(); + auto ret = session->InitEnv(device, device_id); + if (!ret) { + return nullptr; + } + return session; + } catch (std::exception &e) { + MSI_LOG_ERROR << "Inference CreatSession failed"; + return nullptr; + } +} + +bool AclSession::LoadModelFromFile(const std::string &file_name, uint32_t &model_id) { + return model_process_.LoadModelFromFile(file_name, model_id); +} + +bool AclSession::UnloadModel(uint32_t model_id) { + model_process_.UnLoad(); + return true; +} + +bool AclSession::ExecuteModel(uint32_t model_id, const RequestBase &request, + ReplyBase &reply) { // set d context + aclError rt_ret = aclrtSetCurrentContext(context_); + if (rt_ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "set the ascend device context failed"; + return false; + } + return model_process_.Execute(request, reply); +} + +bool AclSession::InitEnv(const std::string &device_type, uint32_t device_id) { + device_type_ = device_type; + device_id_ = device_id; + auto ret = aclInit(nullptr); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "Execute aclInit Failed"; + return false; + } + MSI_LOG_INFO << "acl init success"; + + ret = aclrtSetDevice(device_id_); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "acl open device " << device_id_ << " failed"; + return false; + } + MSI_LOG_INFO << "open device " << device_id_ << " success"; + + ret = aclrtCreateContext(&context_, device_id_); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "acl create context failed"; + return false; + } + MSI_LOG_INFO << "create context success"; + + ret = aclrtCreateStream(&stream_); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "acl create stream failed"; + return false; + } + MSI_LOG_INFO << "create stream success"; + + aclrtRunMode run_mode; + ret = aclrtGetRunMode(&run_mode); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "acl get run mode failed"; + return false; + } + bool is_device = (run_mode == ACL_DEVICE); + model_process_.SetIsDevice(is_device); + MSI_LOG_INFO << "get run mode success is device input/output " << is_device; + + MSI_LOG_INFO << "Init acl success, device id " << device_id_; + return true; +} + +bool AclSession::FinalizeEnv() { + aclError ret; + if (stream_ != nullptr) { + ret = aclrtDestroyStream(stream_); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "destroy stream failed"; + } + stream_ = nullptr; + } + MSI_LOG_INFO << "end to destroy stream"; + if (context_ != nullptr) { + ret = aclrtDestroyContext(context_); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "destroy context failed"; + } + context_ = nullptr; + } + MSI_LOG_INFO << "end to destroy context"; + + ret = aclrtResetDevice(device_id_); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "reset devie " << device_id_ << " failed"; + } + MSI_LOG_INFO << "end to reset device " << device_id_; + + ret = aclFinalize(); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "finalize acl failed"; + } + MSI_LOG_INFO << "end to finalize acl"; + return true; +} + +AclSession::AclSession() = default; +} // namespace mindspore::inference diff --git a/mindspore/ccsrc/backend/session/session.h b/serving/acl/acl_session.h similarity index 52% rename from mindspore/ccsrc/backend/session/session.h rename to serving/acl/acl_session.h index 9d8a385828..206cf4b7fc 100644 --- a/mindspore/ccsrc/backend/session/session.h +++ b/serving/acl/acl_session.h @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_BACKEND_SESSION_SESSION_H -#define MINDSPORE_CCSRC_BACKEND_SESSION_SESSION_H +#ifndef MINDSPORE_SERVING_ACL_SESSION_H +#define MINDSPORE_SERVING_ACL_SESSION_H #include #include @@ -23,31 +23,28 @@ #include #include -#include "backend/session/session_basic.h" -#include "ir/anf.h" #include "include/inference.h" +#include "serving/acl/model_process.h" namespace mindspore { namespace inference { -class Session : public MSSession { +class AclSession : public InferSession { public: - Session(); + AclSession(); - uint32_t CompileGraph(std::shared_ptr funcGraphPtr) override; - - MultiTensor RunGraph(uint32_t graph_id, const std::vector> &inputs) override; - - bool CheckModelInputs(uint32_t graph_id, - const std::vector> &inputs) const override; - - int Init(const std::string &device, uint32_t device_id); - - static void RegAllOp(); + bool InitEnv(const std::string &device_type, uint32_t device_id) override; + bool FinalizeEnv() override; + bool LoadModelFromFile(const std::string &file_name, uint32_t &model_id) override; + bool UnloadModel(uint32_t model_id) override; + bool ExecuteModel(uint32_t model_id, const RequestBase &request, ReplyBase &reply) override; private: - std::shared_ptr session_impl_ = nullptr; - std::vector graph_id_; + std::string device_type_; + int32_t device_id_; + aclrtStream stream_ = nullptr; + aclrtContext context_ = nullptr; + ModelProcess model_process_; }; } // namespace inference } // namespace mindspore -#endif // MINDSPORE_CCSRC_BACKEND_SESSION_SESSION_BASIC_H +#endif // MINDSPORE_SERVING_ACL_SESSION_H diff --git a/serving/acl/model_process.cc b/serving/acl/model_process.cc new file mode 100644 index 0000000000..a76539e72f --- /dev/null +++ b/serving/acl/model_process.cc @@ -0,0 +1,340 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "serving/acl/model_process.h" +#include +#include + +#include "include/infer_log.h" + +namespace mindspore { +namespace inference { + +bool ModelProcess::LoadModelFromFile(const std::string &file_name, uint32_t &model_id) { + aclError acl_ret = aclmdlLoadFromFile(file_name.c_str(), &model_id); + if (acl_ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "Read model file failed, file name is " << file_name; + return false; + } + MSI_LOG_INFO << "Load model success " << file_name; + + model_desc_ = aclmdlCreateDesc(); + acl_ret = aclmdlGetDesc(model_desc_, model_id); + if (acl_ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "Read model desc failed"; + return false; + } + bool ret = InitInputsBuffer(); + if (!ret) { + MSI_LOG_ERROR << "Create input buffer failed"; + return false; + } + ret = InitOutputsBuffer(); + if (!ret) { + MSI_LOG_ERROR << "Create output buffer failed"; + return false; + } + model_id_ = model_id; + return true; +} + +bool ModelProcess::InitInputsBuffer() { + aclError ret; + inputs_ = aclmdlCreateDataset(); + if (inputs_ == nullptr) { + MSI_LOG_ERROR << "Create input dataset failed"; + return false; + } + size_t input_size = aclmdlGetNumInputs(model_desc_); + + for (size_t i = 0; i < input_size; ++i) { + auto buffer_size = aclmdlGetInputSizeByIndex(model_desc_, i); + void *data_mem_buffer = nullptr; + if (!is_run_on_device_) { // need to copy input/output to/from device + ret = aclrtMalloc(&data_mem_buffer, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "Malloc device input buffer faild , input size " << buffer_size; + return false; + } + } + + aclmdlIODims dims; + ret = aclmdlGetInputDims(model_desc_, i, &dims); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "Get input shape failed"; + return false; + } + aclDataType dataType = aclmdlGetInputDataType(model_desc_, i); + std::vector shape(dims.dims, dims.dims + dims.dimCount); + input_infos_.emplace_back(AclTensorInfo{data_mem_buffer, buffer_size, dataType, shape}); + } + MSI_LOG_INFO << "Create model inputs success"; + return true; +} + +bool ModelProcess::CreateDataBuffer(void *&data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) { + aclError ret; + auto free_data_buffer = [this](void *dataMemBuffer) { + if (!is_run_on_device_) { + aclrtFree(dataMemBuffer); + } else { + aclrtFreeHost(dataMemBuffer); + } + }; + if (!is_run_on_device_) { + ret = aclrtMalloc(&data_mem_buffer, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "Malloc device buffer faild , buffer size " << buffer_size; + return false; + } + } else { + ret = aclrtMallocHost(&data_mem_buffer, buffer_size); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "Malloc device buffer faild , buffer size " << buffer_size; + return false; + } + } + + auto data_buffer = aclCreateDataBuffer(data_mem_buffer, buffer_size); + if (data_buffer == nullptr) { + MSI_LOG_ERROR << "Create Data Buffer failed"; + free_data_buffer(data_mem_buffer); + return false; + } + ret = aclmdlAddDatasetBuffer(dataset, data_buffer); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "add data buffer failed"; + free_data_buffer(data_mem_buffer); + aclDestroyDataBuffer(data_buffer); + return false; + } + return true; +} + +bool ModelProcess::InitOutputsBuffer() { + aclError ret; + outputs_ = aclmdlCreateDataset(); + if (outputs_ == nullptr) { + MSI_LOG_ERROR << "Create input dataset failed"; + return false; + } + size_t output_size = aclmdlGetNumOutputs(model_desc_); + for (size_t i = 0; i < output_size; ++i) { + auto buffer_size = aclmdlGetOutputSizeByIndex(model_desc_, i); + + void *data_mem_buffer = nullptr; + if (CreateDataBuffer(data_mem_buffer, buffer_size, outputs_) != true) { + MSI_LOG_ERROR << "add output data buffer failed, buffer size " << buffer_size; + return false; + } + aclmdlIODims dims; + ret = aclmdlGetOutputDims(model_desc_, i, &dims); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "Get input shape failed"; + return false; + } + aclDataType dataType = aclmdlGetOutputDataType(model_desc_, i); + std::vector shape(dims.dims, dims.dims + dims.dimCount); + output_infos_.emplace_back(AclTensorInfo{data_mem_buffer, buffer_size, dataType, shape}); + } + MSI_LOG_INFO << "Create model output success"; + return true; +} + +void ModelProcess::DestroyInputsDataset() { + if (inputs_ == nullptr) { + return; + } + for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(inputs_); i++) { + auto dataBuffer = aclmdlGetDatasetBuffer(inputs_, i); + aclDestroyDataBuffer(dataBuffer); + } + aclmdlDestroyDataset(inputs_); + inputs_ = nullptr; +} + +void ModelProcess::DestroyInputsDataMem() { + if (!is_run_on_device_) { + for (const auto &item : input_infos_) { + aclrtFree(item.device_data); + } + } + input_infos_.clear(); +} + +void ModelProcess::DestroyInputsBuffer() { + DestroyInputsDataset(); + DestroyInputsDataMem(); +} + +void ModelProcess::DestroyOutputsBuffer() { + if (outputs_ == nullptr) { + return; + } + for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(outputs_); i++) { + auto dataBuffer = aclmdlGetDatasetBuffer(outputs_, i); + auto data = aclGetDataBufferAddr(dataBuffer); + if (!is_run_on_device_) { + aclrtFree(data); + } else { + aclrtFreeHost(data); + } + aclDestroyDataBuffer(dataBuffer); + } + aclmdlDestroyDataset(outputs_); + outputs_ = nullptr; + output_infos_.clear(); +} + +void ModelProcess::UnLoad() { + auto ret = aclmdlUnload(model_id_); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "Unload model failed"; + } + if (model_desc_ != nullptr) { + aclmdlDestroyDesc(model_desc_); + model_desc_ = nullptr; + } + DestroyInputsBuffer(); + DestroyOutputsBuffer(); + MSI_LOG_INFO << "End unload model " << model_id_; +} + +bool ModelProcess::CheckAndInitInput(const RequestBase &request) { + aclError ret; + inputs_ = aclmdlCreateDataset(); + // check inputs + if (request.size() != input_infos_.size()) { + MSI_LOG_ERROR << "inputs count not match, required count " << input_infos_.size() << ", given count " + << request.size(); + return false; + } + for (size_t i = 0; i < input_infos_.size(); i++) { + if (request[i] == nullptr) { + MSI_LOG_ERROR << "input " << i << " cannot be null"; + return false; + } + if (request[i]->data_size() != input_infos_[i].buffer_size) { + MSI_LOG_ERROR << "input " << i << " data size not match, required size " << input_infos_[i].buffer_size + << ", given count " << request[i]->data_size(); + return false; + } + } + // copy inputs + for (size_t i = 0; i < input_infos_.size(); i++) { + void *input_buffer = nullptr; + auto &info = input_infos_[i]; + const void *data = request[i]->data(); + if (!is_run_on_device_) { + ret = aclrtMemcpy(info.device_data, info.buffer_size, data, request[i]->data_size(), ACL_MEMCPY_HOST_TO_DEVICE); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "memcpy input " << i << " data to device failed, buffer size " << request[i]->data_size(); + return false; + } + input_buffer = info.device_data; + } else { + input_buffer = const_cast(data); + } + auto data_buffer = aclCreateDataBuffer(input_buffer, info.buffer_size); + if (data_buffer == nullptr) { + MSI_LOG_ERROR << "Create Data Buffer failed"; + return false; + } + ret = aclmdlAddDatasetBuffer(inputs_, data_buffer); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "add data buffer failed"; + aclDestroyDataBuffer(data_buffer); + return false; + } + } + return true; +} + +bool ModelProcess::BuildOutputs(ReplyBase &reply) { + aclError ret; + // copy outputs + reply.clear(); + + std::unordered_map dataTypeMap = { + {ACL_FLOAT16, inference::kMSI_Float16}, {ACL_FLOAT, inference::kMSI_Float32}, {ACL_DOUBLE, inference::kMSI_Float64}, + {ACL_INT8, inference::kMSI_Int8}, {ACL_INT16, inference::kMSI_Int16}, {ACL_INT32, inference::kMSI_Int32}, + {ACL_INT64, inference::kMSI_Int64}, {ACL_UINT8, inference::kMSI_Uint8}, {ACL_UINT16, inference::kMSI_Uint16}, + {ACL_UINT32, inference::kMSI_Uint32}, {ACL_UINT64, inference::kMSI_Uint64}, {ACL_BOOL, inference::kMSI_Bool}, + }; + auto trans_to_serving_type = [&dataTypeMap](aclDataType data_type) { + auto it = dataTypeMap.find(data_type); + if (it == dataTypeMap.end()) { + return inference::kMSI_Unknown; + } else { + return it->second; + } + }; + for (size_t i = 0; i < output_infos_.size(); i++) { + auto &info = output_infos_[i]; + auto output = reply.add(); + if (output == nullptr) { + MSI_LOG_ERROR << "add new output failed"; + return false; + } + output->set_data_type(trans_to_serving_type(info.data_type)); + output->set_shape(info.dims); + if (!output->resize_data(info.buffer_size)) { + MSI_LOG_ERROR << "new output data buffer failed, data size " << info.buffer_size; + return false; + } + if (!is_run_on_device_) { + ret = aclrtMemcpy(output->mutable_data(), output->data_size(), info.device_data, info.buffer_size, + ACL_MEMCPY_DEVICE_TO_HOST); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "Memcpy output " << i << " to host failed, memory size " << info.buffer_size; + return false; + } + } else { + ret = aclrtMemcpy(output->mutable_data(), output->data_size(), info.device_data, info.buffer_size, + ACL_MEMCPY_HOST_TO_HOST); + if (ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "Memcpy output " << i << " to host failed, memory size " << info.buffer_size; + return false; + } + } + } + return true; +} + +bool ModelProcess::Execute(const RequestBase &request, ReplyBase &reply) { + aclError acl_ret; + if (CheckAndInitInput(request) != true) { + MSI_LOG_ERROR << "check or init input failed"; + DestroyInputsDataset(); + return false; + } + acl_ret = aclmdlExecute(model_id_, inputs_, outputs_); + DestroyInputsDataset(); + if (acl_ret != ACL_ERROR_NONE) { + MSI_LOG_ERROR << "Execute Model Failed"; + return false; + } + bool ret = BuildOutputs(reply); + if (!ret) { + MSI_LOG_ERROR << "Build outputs faield"; + return false; + } + MSI_LOG_INFO << "excute model success"; + return true; +} + +} // namespace inference +} // namespace mindspore diff --git a/serving/acl/model_process.h b/serving/acl/model_process.h new file mode 100644 index 0000000000..61bf72574a --- /dev/null +++ b/serving/acl/model_process.h @@ -0,0 +1,74 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_MODEL_PROCESS_ACL +#define INC_MODEL_PROCESS_ACL +#include +#include +#include "acl/acl.h" +#include "acl/acl_mdl.h" +#include "acl/acl_rt.h" +#include "serving/core/util/status.h" +#include "include/inference.h" + +namespace mindspore { +namespace inference { + +struct AclTensorInfo { + void *device_data; + size_t buffer_size; + aclDataType data_type; + std::vector dims; +}; + +class ModelProcess { + public: + ModelProcess() {} + ~ModelProcess() {} + + bool LoadModelFromFile(const std::string &file_name, uint32_t &model_id); + void UnLoad(); + + // override this method to avoid request/reply data copy + bool Execute(const RequestBase &request, ReplyBase &reply); + + void SetIsDevice(bool is_device) { is_run_on_device_ = is_device; } + + private: + uint32_t model_id_ = 0xffffffff; + bool is_run_on_device_ = false; + aclmdlDesc *model_desc_ = nullptr; + aclmdlDataset *inputs_ = nullptr; + aclmdlDataset *outputs_ = nullptr; + std::vector input_infos_; + std::vector output_infos_; + + bool CreateDataBuffer(void *&data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset); + bool CheckAndInitInput(const RequestBase &request); + bool BuildOutputs(ReplyBase &reply); + + bool InitInputsBuffer(); + bool InitOutputsBuffer(); + void DestroyInputsDataset(); + void DestroyInputsDataMem(); + void DestroyInputsBuffer(); + void DestroyOutputsBuffer(); +}; + +} // namespace inference +} // namespace mindspore + +#endif diff --git a/serving/core/server.cc b/serving/core/server.cc index 0ec2385f94..8b58789655 100644 --- a/serving/core/server.cc +++ b/serving/core/server.cc @@ -23,14 +23,14 @@ #include #include #include +#include -#include "mindspore/ccsrc/utils/log_adapter.h" +#include "include/infer_log.h" #include "serving/ms_service.grpc.pb.h" #include "core/util/option_parser.h" #include "core/version_control/version_controller.h" -#include "mindspore/ccsrc/utils/context/ms_context.h" #include "core/util/file_system_operation.h" -#include "graphengine/third_party/fwkacllib/inc/runtime/context.h" +#include "core/serving_tensor.h" using ms_serving::MSService; using ms_serving::PredictReply; @@ -38,12 +38,19 @@ using ms_serving::PredictRequest; namespace mindspore { namespace serving { -using MSTensorPtr = std::shared_ptr; + +#define MSI_TIME_STAMP_START(name) auto time_start_##name = std::chrono::steady_clock::now(); +#define MSI_TIME_STAMP_END(name) \ + { \ + auto time_end_##name = std::chrono::steady_clock::now(); \ + auto time_cost = std::chrono::duration(time_end_##name - time_start_##name).count(); \ + MSI_LOG_INFO << #name " Time Cost " << time_cost << "ms ---------------------"; \ + } Status Session::CreatDeviceSession(const std::string &device, uint32_t device_id) { - session_ = inference::MSSession::CreateSession(device, device_id); + session_ = inference::InferSession::CreateSession(device, device_id); if (session_ == nullptr) { - MS_LOG(ERROR) << "Creat Session Failed"; + MSI_LOG(ERROR) << "Creat Session Failed"; return FAILED; } device_type_ = device; @@ -55,53 +62,56 @@ Session &Session::Instance() { return instance; } -Status Session::Predict(const std::vector &inputs, inference::MultiTensor *outputs) { - if (last_graph_ == nullptr) { - MS_LOG(ERROR) << "the model has not loaded"; +Status Session::Predict(const PredictRequest &request, PredictReply &reply) { + if (!model_loaded_) { + MSI_LOG(ERROR) << "the model has not loaded"; return FAILED; } if (session_ == nullptr) { - MS_LOG(ERROR) << "the inference session has not be initialized"; + MSI_LOG(ERROR) << "the inference session has not be initialized"; return FAILED; } std::lock_guard lock(mutex_); - MS_LOG(INFO) << "run Predict"; + MSI_LOG(INFO) << "run Predict"; - if (!session_->CheckModelInputs(graph_id_, inputs)) { - MS_LOG(ERROR) << "Input error."; + ServingRequest serving_request(request); + ServingReply serving_reply(reply); + + auto ret = session_->ExecuteModel(graph_id_, serving_request, serving_reply); + MSI_LOG(INFO) << "run Predict finished"; + if (!ret) { + MSI_LOG(ERROR) << "execute model return failed"; return FAILED; } - - *outputs = session_->RunGraph(graph_id_, inputs); - MS_LOG(INFO) << "run Predict finished"; return SUCCESS; } Status Session::Warmup(const MindSporeModelPtr model) { if (session_ == nullptr) { - MS_LOG(ERROR) << "The CreatDeviceSession should be called, before warmup"; + MSI_LOG(ERROR) << "The CreatDeviceSession should be called, before warmup"; return FAILED; } std::lock_guard lock(mutex_); - size_t size = 0; std::string file_name = model->GetModelPath() + '/' + model->GetModelName(); - char *graphBuf = ReadFile(file_name.c_str(), &size); - if (graphBuf == nullptr) { - MS_LOG(ERROR) << "Read model file failed, file name is " << file_name.c_str(); + model_loaded_ = false; + MSI_TIME_STAMP_START(LoadModelFromFile) + auto ret = session_->LoadModelFromFile(file_name, graph_id_); + MSI_TIME_STAMP_END(LoadModelFromFile) + if (!ret) { + MSI_LOG(ERROR) << "Load graph model failed, file name is " << file_name.c_str(); return FAILED; } - last_graph_ = inference::LoadModel(graphBuf, size, device_type_); - if (last_graph_ == nullptr) { - MS_LOG(ERROR) << "Load graph model failed, file name is " << file_name.c_str(); - return FAILED; - } - graph_id_ = session_->CompileGraph(last_graph_); - MS_LOG(INFO) << "Session Warmup finished"; + model_loaded_ = true; + MSI_LOG(INFO) << "Session Warmup finished"; return SUCCESS; } Status Session::Clear() { - session_ = nullptr; + if (session_ != nullptr) { + session_->UnloadModel(graph_id_); + session_->FinalizeEnv(); + session_ = nullptr; + } return SUCCESS; } @@ -109,121 +119,30 @@ namespace { static const uint32_t uint32max = 0x7FFFFFFF; std::promise exit_requested; -const std::map type2id_map{ - {ms_serving::MS_UNKNOWN, TypeId::kNumberTypeBegin}, {ms_serving::MS_BOOL, TypeId::kNumberTypeBool}, - {ms_serving::MS_INT8, TypeId::kNumberTypeInt8}, {ms_serving::MS_UINT8, TypeId::kNumberTypeUInt8}, - {ms_serving::MS_INT16, TypeId::kNumberTypeInt16}, {ms_serving::MS_UINT16, TypeId::kNumberTypeUInt16}, - {ms_serving::MS_INT32, TypeId::kNumberTypeInt32}, {ms_serving::MS_UINT32, TypeId::kNumberTypeUInt32}, - {ms_serving::MS_INT64, TypeId::kNumberTypeInt64}, {ms_serving::MS_UINT64, TypeId::kNumberTypeUInt64}, - {ms_serving::MS_FLOAT16, TypeId::kNumberTypeFloat16}, {ms_serving::MS_FLOAT32, TypeId::kNumberTypeFloat32}, - {ms_serving::MS_FLOAT64, TypeId::kNumberTypeFloat64}, -}; - -const std::map id2type_map{ - {TypeId::kNumberTypeBegin, ms_serving::MS_UNKNOWN}, {TypeId::kNumberTypeBool, ms_serving::MS_BOOL}, - {TypeId::kNumberTypeInt8, ms_serving::MS_INT8}, {TypeId::kNumberTypeUInt8, ms_serving::MS_UINT8}, - {TypeId::kNumberTypeInt16, ms_serving::MS_INT16}, {TypeId::kNumberTypeUInt16, ms_serving::MS_UINT16}, - {TypeId::kNumberTypeInt32, ms_serving::MS_INT32}, {TypeId::kNumberTypeUInt32, ms_serving::MS_UINT32}, - {TypeId::kNumberTypeInt64, ms_serving::MS_INT64}, {TypeId::kNumberTypeUInt64, ms_serving::MS_UINT64}, - {TypeId::kNumberTypeFloat16, ms_serving::MS_FLOAT16}, {TypeId::kNumberTypeFloat32, ms_serving::MS_FLOAT32}, - {TypeId::kNumberTypeFloat64, ms_serving::MS_FLOAT64}, -}; -const std::map length_map{ - {ms_serving::MS_UNKNOWN, 0}, - {ms_serving::MS_BOOL, sizeof(bool)}, - {ms_serving::MS_INT8, sizeof(int8_t)}, - {ms_serving::MS_UINT8, sizeof(uint8_t)}, - {ms_serving::MS_INT16, sizeof(int16_t)}, - {ms_serving::MS_UINT16, sizeof(uint16_t)}, - {ms_serving::MS_INT32, sizeof(int32_t)}, - {ms_serving::MS_UINT32, sizeof(uint32_t)}, - {ms_serving::MS_INT64, sizeof(int64_t)}, - {ms_serving::MS_UINT64, sizeof(uint64_t)}, - {ms_serving::MS_FLOAT16, 2}, - {ms_serving::MS_FLOAT32, 4}, - {ms_serving::MS_FLOAT64, 8}, -}; -MSTensorPtr ServingTensor2MSTensor(const ms_serving::Tensor &tensor) { - std::vector shape; - for (auto dim : tensor.tensor_shape().dims()) { - shape.push_back(static_cast(dim)); - } - auto iter = type2id_map.find(tensor.tensor_type()); - if (iter == type2id_map.end()) { - MS_LOG(ERROR) << "input tensor type is wrong, type is " << tensor.tensor_type(); - return nullptr; - } - TypeId type = iter->second; - auto ms_tensor = std::shared_ptr(inference::MSTensor::CreateTensor(type, shape)); - memcpy_s(ms_tensor->MutableData(), ms_tensor->Size(), tensor.data().data(), tensor.data().size()); - return ms_tensor; -} - -ms_serving::Tensor MSTensor2ServingTensor(MSTensorPtr ms_tensor) { - ms_serving::Tensor tensor; - ms_serving::TensorShape shape; - for (auto dim : ms_tensor->shape()) { - shape.add_dims(dim); - } - *tensor.mutable_tensor_shape() = shape; - auto iter = id2type_map.find(ms_tensor->data_type()); - if (iter == id2type_map.end()) { - MS_LOG(ERROR) << "input tensor type is wrong, type is " << tensor.tensor_type(); - return tensor; - } - tensor.set_tensor_type(iter->second); - tensor.set_data(ms_tensor->MutableData(), ms_tensor->Size()); - return tensor; -} - void ClearEnv() { Session::Instance().Clear(); - inference::ExitInference(); + // inference::ExitInference(); } void HandleSignal(int sig) { exit_requested.set_value(); } -#ifdef ENABLE_D -static rtContext_t g_ctx = nullptr; -#endif } // namespace // Service Implement class MSServiceImpl final : public MSService::Service { grpc::Status Predict(grpc::ServerContext *context, const PredictRequest *request, PredictReply *reply) override { std::lock_guard lock(mutex_); -#ifdef ENABLE_D - if (g_ctx == nullptr) { - MS_LOG(ERROR) << "rtCtx is nullptr"; - return grpc::Status::CANCELLED; - } - rtError_t rt_ret = rtCtxSetCurrent(g_ctx); - if (rt_ret != RT_ERROR_NONE) { - MS_LOG(ERROR) << "set Ascend rtCtx failed"; - } -#endif - std::vector inputs; - inference::MultiTensor outputs; - for (int i = 0; i < request->data_size(); i++) { - auto input = ServingTensor2MSTensor(request->data(i)); - if (input == nullptr) { - MS_LOG(ERROR) << "Tensor convert failed"; - return grpc::Status::CANCELLED; - } - inputs.push_back(input); - } - auto res = Session::Instance().Predict(inputs, &outputs); + MSI_TIME_STAMP_START(Predict) + auto res = Session::Instance().Predict(*request, *reply); + MSI_TIME_STAMP_END(Predict) if (res != SUCCESS) { return grpc::Status::CANCELLED; } - for (const auto &tensor : outputs) { - *reply->add_result() = MSTensor2ServingTensor(tensor); - } - MS_LOG(INFO) << "Finish call service Eval"; + MSI_LOG(INFO) << "Finish call service Eval"; return grpc::Status::OK; } grpc::Status Test(grpc::ServerContext *context, const PredictRequest *request, PredictReply *reply) override { - MS_LOG(INFO) << "TestService call"; + MSI_LOG(INFO) << "TestService call"; return grpc::Status::OK; } std::mutex mutex_; @@ -242,28 +161,17 @@ Status Server::BuildAndStart() { auto device_id = option_args->device_id; res = Session::Instance().CreatDeviceSession(device_type, device_id); if (res != SUCCESS) { - MS_LOG(ERROR) << "creat session failed"; + MSI_LOG(ERROR) << "creat session failed"; ClearEnv(); return res; } VersionController version_controller(option_args->poll_model_wait_seconds, model_path, model_name); res = version_controller.Run(); if (res != SUCCESS) { - MS_LOG(ERROR) << "load model failed"; + MSI_LOG(ERROR) << "load model failed"; ClearEnv(); return res; } -#ifdef ENABLE_D - // set d context - rtContext_t ctx = nullptr; - rtError_t rt_ret = rtCtxGetCurrent(&ctx); - if (rt_ret != RT_ERROR_NONE || ctx == nullptr) { - MS_LOG(ERROR) << "the ascend device context is null"; - ClearEnv(); - return FAILED; - } - g_ctx = ctx; -#endif MSServiceImpl ms_service; grpc::EnableDefaultHealthCheckService(true); grpc::reflection::InitProtoReflectionServerBuilderPlugin(); @@ -276,13 +184,13 @@ Status Server::BuildAndStart() { serverBuilder.RegisterService(&ms_service); std::unique_ptr server(serverBuilder.BuildAndStart()); if (server == nullptr) { - MS_LOG(ERROR) << "The serving server create failed"; + MSI_LOG(ERROR) << "The serving server create failed"; ClearEnv(); return FAILED; } auto grpc_server_run = [&server]() { server->Wait(); }; std::thread serving_thread(grpc_server_run); - MS_LOG(INFO) << "MS Serving listening on " << server_address; + MSI_LOG(INFO) << "MS Serving listening on " << server_address; auto exit_future = exit_requested.get_future(); exit_future.wait(); ClearEnv(); diff --git a/serving/core/server.h b/serving/core/server.h index f1927e9946..3f8b1da066 100644 --- a/serving/core/server.h +++ b/serving/core/server.h @@ -23,14 +23,21 @@ #include "util/status.h" #include "version_control/model.h" #include "include/inference.h" -#include "mindspore/ccsrc/debug/info.h" +#include "serving/ms_service.pb.h" +#include "serving/ms_service.grpc.pb.h" + namespace mindspore { namespace serving { + +using ms_serving::PredictReply; +using ms_serving::PredictRequest; + class Session { public: static Session &Instance(); Status CreatDeviceSession(const std::string &device, uint32_t device_id); - Status Predict(const std::vector> &inputs, inference::MultiTensor *output); + // Status Predict(const inference::MultiTensor &inputs, inference::MultiTensor &output); + Status Predict(const PredictRequest &request, PredictReply &reply); Status Warmup(const MindSporeModelPtr model); Status Clear(); @@ -38,8 +45,8 @@ class Session { Session() = default; ~Session() = default; int sesseion_id_{0}; - std::shared_ptr session_{nullptr}; - FuncGraphPtr last_graph_{nullptr}; + std::shared_ptr session_{nullptr}; + bool model_loaded_ = false; uint32_t graph_id_{0}; std::mutex mutex_; std::string device_type_; diff --git a/serving/core/serving_tensor.cc b/serving/core/serving_tensor.cc new file mode 100644 index 0000000000..4fe43cdf4c --- /dev/null +++ b/serving/core/serving_tensor.cc @@ -0,0 +1,164 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "core/serving_tensor.h" +#include +#include +#include +#include +#include "include/infer_log.h" + +using std::string; +using std::unordered_map; +using std::vector; + +namespace mindspore { +namespace serving { + +using inference::DataType; +using inference::InferTensorBase; + +const size_t kMaxShapeElementCount = INT32_MAX; +const size_t kMaxDataBufferSize = UINT32_MAX; + +ServingTensor::ServingTensor(ms_serving::Tensor &other) : tensor_(other) {} + +ServingTensor::~ServingTensor() {} + +DataType ServingTensor::data_type() const { + const std::unordered_map type2id_map{ + {ms_serving::MS_UNKNOWN, inference::kMSI_Unknown}, {ms_serving::MS_BOOL, inference::kMSI_Bool}, + {ms_serving::MS_INT8, inference::kMSI_Int8}, {ms_serving::MS_UINT8, inference::kMSI_Uint8}, + {ms_serving::MS_INT16, inference::kMSI_Int16}, {ms_serving::MS_UINT16, inference::kMSI_Uint16}, + {ms_serving::MS_INT32, inference::kMSI_Int32}, {ms_serving::MS_UINT32, inference::kMSI_Uint32}, + {ms_serving::MS_INT64, inference::kMSI_Int64}, {ms_serving::MS_UINT64, inference::kMSI_Uint64}, + {ms_serving::MS_FLOAT16, inference::kMSI_Float16}, {ms_serving::MS_FLOAT32, inference::kMSI_Float32}, + {ms_serving::MS_FLOAT64, inference::kMSI_Float64}, + }; + auto it = type2id_map.find(tensor_.tensor_type()); + if (it == type2id_map.end()) { + MSI_LOG_WARNING << "failed to get data type, undefined data type " << tensor_.tensor_type(); + return inference::kMSI_Unknown; + } else { + return it->second; + } +} + +void ServingTensor::set_data_type(DataType data_type) { + const std::unordered_map id2type_map{ + {inference::kMSI_Unknown, ms_serving::MS_UNKNOWN}, {inference::kMSI_Bool, ms_serving::MS_BOOL}, + {inference::kMSI_Float64, ms_serving::MS_FLOAT64}, {inference::kMSI_Int8, ms_serving::MS_INT8}, + {inference::kMSI_Uint8, ms_serving::MS_UINT8}, {inference::kMSI_Int16, ms_serving::MS_INT16}, + {inference::kMSI_Uint16, ms_serving::MS_UINT16}, {inference::kMSI_Int32, ms_serving::MS_INT32}, + {inference::kMSI_Uint32, ms_serving::MS_UINT32}, {inference::kMSI_Int64, ms_serving::MS_INT64}, + {inference::kMSI_Uint64, ms_serving::MS_UINT64}, {inference::kMSI_Float16, ms_serving::MS_FLOAT16}, + {inference::kMSI_Float32, ms_serving::MS_FLOAT32}, + }; + auto it = id2type_map.find(data_type); + if (it == id2type_map.end()) { + MSI_LOG_WARNING << "failed to set data type, undefined data type " << data_type; + tensor_.set_tensor_type(ms_serving::MS_UNKNOWN); + } else { + tensor_.set_tensor_type(it->second); + } +} + +std::vector ServingTensor::shape() const { + std::vector result; + auto dims = tensor_.tensor_shape().dims(); + std::transform(dims.begin(), dims.end(), std::back_inserter(result), [](const int64_t dim) { return dim; }); + return result; +} + +void ServingTensor::set_shape(const std::vector &shape) { + auto tensor_shape = tensor_.mutable_tensor_shape(); + tensor_shape->Clear(); + size_t element_count = 1; + for (auto dim : shape) { + if (dim <= 0 || element_count > kMaxShapeElementCount / dim) { + MSI_LOG_ERROR << "failed to set shape, invalid dim num " << dim; + tensor_shape->Clear(); + return; + } + element_count *= dim; + tensor_shape->add_dims(dim); + } +} + +bool ServingTensor::resize_data(size_t data_len) { + string *buffer = tensor_.mutable_data(); + if (buffer == nullptr) { + MSI_LOG_ERROR << "invalid buffer data"; + return false; + } + buffer->resize(data_len); + return true; +} + +size_t ServingTensor::data_size() const { return tensor_.data().size(); } + +void *ServingTensor::mutable_data() { return const_cast(tensor_.mutable_data()->data()); } + +const void *ServingTensor::data() const { return tensor_.data().data(); } + +ServingRequest::ServingRequest(const ms_serving::PredictRequest &request) : request_(request) { + auto &data = request_.data(); + std::transform(data.begin(), data.end(), std::back_inserter(cache_), + [](const ms_serving::Tensor &item) { return ServingTensor(const_cast(item)); }); +} + +size_t ServingRequest::size() const { return request_.data_size(); } + +const InferTensorBase *ServingRequest::operator[](size_t index) const { + if (index >= cache_.size()) { + MSI_LOG_ERROR << "visit invalid index " << index << " total size " << cache_.size(); + return nullptr; + } + return &(cache_[index]); +} + +size_t ServingReply::size() const { return cache_.size(); } + +InferTensorBase *ServingReply::operator[](size_t index) { + if (index >= cache_.size()) { + MSI_LOG_ERROR << "visit invalid index " << index << " total size " << cache_.size(); + return nullptr; + } + return &(cache_[index]); +} + +const InferTensorBase *ServingReply::operator[](size_t index) const { + if (index >= cache_.size()) { + MSI_LOG_ERROR << "visit invalid index " << index << " total size " << cache_.size(); + return nullptr; + } + return &(cache_[index]); +} + +InferTensorBase *ServingReply::add() { + auto new_item = reply_.add_result(); + if (new_item == nullptr) { + MSI_LOG_ERROR << "add new item failed, current total size " << cache_.size(); + return nullptr; + } + cache_.push_back(ServingTensor(*new_item)); + return &(cache_.back()); +} + +void ServingReply::clear() { reply_.mutable_result()->Clear(); } + +} // namespace serving +} // namespace mindspore diff --git a/serving/core/serving_tensor.h b/serving/core/serving_tensor.h new file mode 100644 index 0000000000..fbbc1e5040 --- /dev/null +++ b/serving/core/serving_tensor.h @@ -0,0 +1,79 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_SERVING_TENSOR_H_ +#define MINDSPORE_SERVING_TENSOR_H_ + +#include +#include +#include +#include "include/infer_tensor.h" +#include "serving/ms_service.pb.h" + +namespace mindspore { +namespace serving { + +class MS_API ServingTensor : public inference::InferTensorBase { + public: + // the other's lifetime must longer than this object + explicit ServingTensor(ms_serving::Tensor &other); + ~ServingTensor(); + + inference::DataType data_type() const override; + void set_data_type(inference::DataType type) override; + std::vector shape() const override; + void set_shape(const std::vector &shape) override; + const void *data() const override; + size_t data_size() const override; + bool resize_data(size_t data_len) override; + void *mutable_data() override; + + private: + // if tensor_ is reference from other ms_serving::Tensor, the other's lifetime must + // longer than this object + ms_serving::Tensor &tensor_; +}; + +class ServingRequest : public inference::RequestBase { + public: + explicit ServingRequest(const ms_serving::PredictRequest &request); + + size_t size() const override; + const inference::InferTensorBase *operator[](size_t index) const override; + + private: + const ms_serving::PredictRequest &request_; + std::vector cache_; +}; + +class ServingReply : public inference::ReplyBase { + public: + explicit ServingReply(ms_serving::PredictReply &reply) : reply_(reply) {} + + size_t size() const override; + inference::InferTensorBase *operator[](size_t index) override; + const inference::InferTensorBase *operator[](size_t index) const override; + inference::InferTensorBase *add() override; + void clear() override; + + private: + ms_serving::PredictReply &reply_; + std::vector cache_; +}; + +} // namespace serving +} // namespace mindspore +#endif // MINDSPORE_SERVING_TENSOR_H_ diff --git a/serving/core/util/file_system_operation.cc b/serving/core/util/file_system_operation.cc index 1af512a54c..66bbde3414 100644 --- a/serving/core/util/file_system_operation.cc +++ b/serving/core/util/file_system_operation.cc @@ -25,43 +25,10 @@ #include #include #include -#include "mindspore/ccsrc/utils/log_adapter.h" +#include "include/infer_log.h" namespace mindspore { namespace serving { -char *ReadFile(const char *file, size_t *size) { - if (file == nullptr) { - MS_LOG(ERROR) << "file is nullptr"; - return nullptr; - } - MS_ASSERT(size != nullptr); - std::string realPath = file; - std::ifstream ifs(realPath); - if (!ifs.good()) { - MS_LOG(ERROR) << "file: " << realPath << " is not exist"; - return nullptr; - } - - if (!ifs.is_open()) { - MS_LOG(ERROR) << "file: " << realPath << "open failed"; - return nullptr; - } - - ifs.seekg(0, std::ios::end); - *size = ifs.tellg(); - std::unique_ptr buf(new (std::nothrow) char[*size]); - if (buf == nullptr) { - MS_LOG(ERROR) << "malloc buf failed, file: " << realPath; - ifs.close(); - return nullptr; - } - - ifs.seekg(0, std::ios::beg); - ifs.read(buf.get(), *size); - ifs.close(); - - return buf.release(); -} bool DirOrFileExist(const std::string &file_path) { int ret = access(file_path.c_str(), 0); @@ -74,7 +41,7 @@ std::vector GetAllSubDirs(const std::string &dir_path) { std::vector SubDirs; if ((dir = opendir(dir_path.c_str())) == NULL) { - MS_LOG(ERROR) << "Open " << dir_path << " error!"; + MSI_LOG(ERROR) << "Open " << dir_path << " error!"; return std::vector(); } diff --git a/serving/core/util/option_parser.cc b/serving/core/util/option_parser.cc index c7f00e3733..df2047c30f 100644 --- a/serving/core/util/option_parser.cc +++ b/serving/core/util/option_parser.cc @@ -19,10 +19,11 @@ #include #include #include -#include "mindspore/ccsrc/utils/log_adapter.h" +#include "include/infer_log.h" namespace mindspore { namespace serving { + bool StartWith(const std::string &str, const std::string &expected) { return expected.empty() || (str.size() >= expected.size() && memcmp(str.data(), expected.data(), expected.size()) == 0); diff --git a/serving/core/version_control/model.cc b/serving/core/version_control/model.cc index 8e3942b926..a656e9af91 100644 --- a/serving/core/version_control/model.cc +++ b/serving/core/version_control/model.cc @@ -15,18 +15,19 @@ */ #include "core/version_control/model.h" #include -#include "mindspore/ccsrc/utils/log_adapter.h" +#include "include/infer_log.h" namespace mindspore { namespace serving { + MindSporeModel::MindSporeModel(const std::string &model_name, const std::string &model_path, const std::string &model_version, const time_t &last_update_time) : model_name_(model_name), model_path_(model_path), model_version_(model_version), last_update_time_(last_update_time) { - MS_LOG(INFO) << "init mindspore model, model_name = " << model_name_ << ", model_path = " << model_path_ - << ", model_version = " << model_version_ << ", last_update_time = " << last_update_time_; + MSI_LOG(INFO) << "init mindspore model, model_name = " << model_name_ << ", model_path = " << model_path_ + << ", model_version = " << model_version_ << ", last_update_time = " << last_update_time_; } } // namespace serving } // namespace mindspore diff --git a/serving/core/version_control/version_controller.cc b/serving/core/version_control/version_controller.cc index 71aba923d5..1dc6b1b2bb 100644 --- a/serving/core/version_control/version_controller.cc +++ b/serving/core/version_control/version_controller.cc @@ -20,11 +20,12 @@ #include #include #include "util/file_system_operation.h" -#include "mindspore/ccsrc/utils/log_adapter.h" +#include "include/infer_log.h" #include "core/server.h" namespace mindspore { namespace serving { + volatile bool stop_poll = false; std::string GetVersionFromPath(const std::string &path) { @@ -96,7 +97,7 @@ Status VersionController::Run() { Status VersionController::CreateInitModels() { if (!DirOrFileExist(models_path_)) { - MS_LOG(ERROR) << "Model Path Not Exist!" << std::endl; + MSI_LOG(ERROR) << "Model Path Not Exist!" << std::endl; return FAILED; } std::vector SubDirs = GetAllSubDirs(models_path_); @@ -115,7 +116,7 @@ Status VersionController::CreateInitModels() { } } if (valid_models_.empty()) { - MS_LOG(ERROR) << "There is no valid model for serving"; + MSI_LOG(ERROR) << "There is no valid model for serving"; return FAILED; } auto ret = Session::Instance().Warmup(valid_models_.back()); diff --git a/serving/cpp_example/CMakeLists.txt b/serving/cpp_example/CMakeLists.txt index aaf0277880..bcb6ab0462 100644 --- a/serving/cpp_example/CMakeLists.txt +++ b/serving/cpp_example/CMakeLists.txt @@ -8,33 +8,33 @@ add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0) find_package(Threads REQUIRED) # This branch assumes that gRPC and all its dependencies are already installed - # on this system, so they can be located by find_package(). +# on this system, so they can be located by find_package(). - # Find Protobuf installation - # Looks for protobuf-config.cmake file installed by Protobuf's cmake installation. - set(protobuf_MODULE_COMPATIBLE TRUE) - find_package(Protobuf CONFIG REQUIRED) - message(STATUS "Using protobuf ${protobuf_VERSION}") +# Find Protobuf installation +# Looks for protobuf-config.cmake file installed by Protobuf's cmake installation. +set(protobuf_MODULE_COMPATIBLE TRUE) +find_package(Protobuf CONFIG REQUIRED) +message(STATUS "Using protobuf ${protobuf_VERSION}") - set(_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf) - set(_REFLECTION gRPC::grpc++_reflection) - if(CMAKE_CROSSCOMPILING) - find_program(_PROTOBUF_PROTOC protoc) - else() - set(_PROTOBUF_PROTOC $) - endif() +set(_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf) +set(_REFLECTION gRPC::grpc++_reflection) +if (CMAKE_CROSSCOMPILING) + find_program(_PROTOBUF_PROTOC protoc) +else () + set(_PROTOBUF_PROTOC $) +endif () - # Find gRPC installation - # Looks for gRPCConfig.cmake file installed by gRPC's cmake installation. - find_package(gRPC CONFIG REQUIRED) - message(STATUS "Using gRPC ${gRPC_VERSION}") +# Find gRPC installation +# Looks for gRPCConfig.cmake file installed by gRPC's cmake installation. +find_package(gRPC CONFIG REQUIRED) +message(STATUS "Using gRPC ${gRPC_VERSION}") - set(_GRPC_GRPCPP gRPC::grpc++) - if(CMAKE_CROSSCOMPILING) - find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin) - else() - set(_GRPC_CPP_PLUGIN_EXECUTABLE $) - endif() +set(_GRPC_GRPCPP gRPC::grpc++) +if (CMAKE_CROSSCOMPILING) + find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin) +else () + set(_GRPC_CPP_PLUGIN_EXECUTABLE $) +endif () # Proto file get_filename_component(hw_proto "../ms_service.proto" ABSOLUTE) @@ -59,7 +59,7 @@ add_custom_command( include_directories("${CMAKE_CURRENT_BINARY_DIR}") # Targets greeter_[async_](client|server) -foreach(_target +foreach (_target ms_client ms_server) add_executable(${_target} "${_target}.cc" ${hw_proto_srcs} @@ -68,4 +68,4 @@ foreach(_target ${_REFLECTION} ${_GRPC_GRPCPP} ${_PROTOBUF_LIBPROTOBUF}) -endforeach() +endforeach () diff --git a/serving/cpp_example/ms_client.cc b/serving/cpp_example/ms_client.cc index 3a9cac77e4..3be4a7798d 100644 --- a/serving/cpp_example/ms_client.cc +++ b/serving/cpp_example/ms_client.cc @@ -211,12 +211,77 @@ PredictRequest ReadBertInput() { return request; } +PredictRequest ReadLenetInput() { + size_t size; + auto buf = ReadFile("lenet_img.bin", &size); + if (buf == nullptr) { + std::cout << "read file failed" << std::endl; + return PredictRequest(); + } + PredictRequest request; + auto cur = buf; + if (size > 0) { + Tensor data; + TensorShape shape; + // set type + data.set_tensor_type(ms_serving::MS_FLOAT32); + + // set shape + shape.add_dims(size / sizeof(float)); + *data.mutable_tensor_shape() = shape; + + // set data + data.set_data(cur, size); + *request.add_data() = data; + } + std::cout << "get input data size " << size << std::endl; + return request; +} + +PredictRequest ReadOtherInput(const std::string &data_file) { + size_t size; + auto buf = ReadFile(data_file.c_str(), &size); + if (buf == nullptr) { + std::cout << "read file failed" << std::endl; + return PredictRequest(); + } + PredictRequest request; + auto cur = buf; + if (size > 0) { + Tensor data; + TensorShape shape; + // set type + data.set_tensor_type(ms_serving::MS_FLOAT32); + + // set shape + shape.add_dims(size / sizeof(float)); + *data.mutable_tensor_shape() = shape; + + // set data + data.set_data(cur, size); + *request.add_data() = data; + } + std::cout << "get input data size " << size << std::endl; + return request; +} + +template +void print_array_item(const DT *data, size_t size) { + for (size_t i = 0; i < size && i < 100; i++) { + std::cout << data[i] << '\t'; + if ((i + 1) % 10 == 0) { + std::cout << std::endl; + } + } + std::cout << std::endl; +} + class MSClient { public: explicit MSClient(std::shared_ptr channel) : stub_(MSService::NewStub(channel)) {} ~MSClient() = default; - std::string Predict(const std::string &type) { + std::string Predict(const std::string &type, const std::string &data_file) { // Data we are sending to the server. PredictRequest request; if (type == "add") { @@ -234,6 +299,10 @@ class MSClient { *request.add_data() = data; } else if (type == "bert") { request = ReadBertInput(); + } else if (type == "lenet") { + request = ReadLenetInput(); + } else if (type == "other") { + request = ReadOtherInput(data_file); } else { std::cout << "type only support bert or add, but input is " << type << std::endl; } @@ -256,6 +325,20 @@ class MSClient { // Act upon its status. if (status.ok()) { + for (size_t i = 0; i < reply.result_size(); i++) { + auto result = reply.result(i); + if (result.tensor_type() == ms_serving::DataType::MS_FLOAT32) { + print_array_item(reinterpret_cast(result.data().data()), result.data().size() / sizeof(float)); + } else if (result.tensor_type() == ms_serving::DataType::MS_INT32) { + print_array_item(reinterpret_cast(result.data().data()), + result.data().size() / sizeof(int32_t)); + } else if (result.tensor_type() == ms_serving::DataType::MS_UINT32) { + print_array_item(reinterpret_cast(result.data().data()), + result.data().size() / sizeof(uint32_t)); + } else { + std::cout << "output datatype " << result.tensor_type() << std::endl; + } + } return "RPC OK"; } else { std::cout << status.error_code() << ": " << status.error_message() << std::endl; @@ -277,6 +360,8 @@ int main(int argc, char **argv) { std::string arg_target_str("--target"); std::string type; std::string arg_type_str("--type"); + std::string arg_data_str("--data"); + std::string data = "default_data.bin"; if (argc > 2) { { // parse target @@ -304,19 +389,33 @@ int main(int argc, char **argv) { if (arg_val2[start_pos] == '=') { type = arg_val2.substr(start_pos + 1); } else { - std::cout << "The only correct argument syntax is --target=" << std::endl; + std::cout << "The only correct argument syntax is --type=" << std::endl; return 0; } } else { type = "add"; } } + if (argc > 3) { + // parse type + std::string arg_val3 = argv[3]; + size_t start_pos = arg_val3.find(arg_data_str); + if (start_pos != std::string::npos) { + start_pos += arg_data_str.size(); + if (arg_val3[start_pos] == '=') { + data = arg_val3.substr(start_pos + 1); + } else { + std::cout << "The only correct argument syntax is --data=" << std::endl; + return 0; + } + } + } } else { target_str = "localhost:5500"; type = "add"; } MSClient client(grpc::CreateChannel(target_str, grpc::InsecureChannelCredentials())); - std::string reply = client.Predict(type); + std::string reply = client.Predict(type, data); std::cout << "client received: " << reply << std::endl; return 0;