From f33ea707cd3c06fc715efbb06b48ccbbc642bd2a Mon Sep 17 00:00:00 2001 From: xuyongfei Date: Thu, 11 Aug 2022 17:41:15 +0800 Subject: [PATCH] Cloud inference tensor opt --- include/api/types.h | 47 +++- mindspore/ccsrc/cxx_api/types.cc | 24 +- mindspore/core/ir/tensor.cc | 6 +- mindspore/core/ir/tensor.h | 12 + mindspore/lite/CMakeLists.txt | 16 +- .../lite/src/common/mutable_tensor_impl.h | 44 +++ .../src/extendrt/cxx_api/model/model_impl.cc | 92 +++---- .../cxx_api/model_pool/model_worker.cc | 14 +- mindspore/lite/src/extendrt/cxx_api/types.cc | 158 ++++------- .../distribution/distribution_base.cc | 5 +- .../extendrt/delegate/tensorrt/tensor_info.cc | 37 ++- .../delegate/tensorrt/tensorrt_allocator.cc | 27 ++ .../delegate/tensorrt/tensorrt_allocator.h | 1 + .../tensorrt/tensorrt_graph_executor.cc | 68 ++--- .../tensorrt/tensorrt_graph_executor.h | 1 - .../delegate/tensorrt/tensorrt_subgraph.cc | 251 +++++++++++------- .../delegate/tensorrt/tensorrt_subgraph.h | 8 +- mindspore/lite/src/extendrt/infer_session.cc | 21 +- mindspore/lite/src/extendrt/infer_session.h | 12 +- .../src/extendrt/session/delegate_session.cc | 11 +- .../src/extendrt/session/delegate_session.h | 10 +- .../session/graph_executor_session.cc | 54 ++-- .../extendrt/session/graph_executor_session.h | 14 +- .../extendrt/session/lite_infer_session.cc | 59 ++-- .../src/extendrt/session/lite_infer_session.h | 14 +- .../lite/src/extendrt/single_op_session.cc | 43 ++- .../lite/src/extendrt/single_op_session.h | 14 +- .../src/extendrt/utils/kernel_graph_utils.cc | 9 +- .../lite/src/extendrt/utils/runtime_utils.cc | 13 +- .../lite/src/extendrt/utils/runtime_utils.h | 4 +- .../src/extendrt/utils/tensor_default_impl.h | 141 ++++++++++ .../lite/src/extendrt/utils/tensor_utils.cc | 96 ++++++- .../lite/src/extendrt/utils/tensor_utils.h | 153 +++++++++++ mindspore/lite/src/litert/c_api/tensor_c.cc | 4 +- .../src/litert/cxx_api/tensor/tensor_impl.cc | 6 + .../src/litert/cxx_api/tensor/tensor_impl.h | 30 ++- mindspore/lite/src/litert/cxx_api/types.cc | 52 ++-- mindspore/lite/src/tensor.h | 9 +- .../tools/benchmark/benchmark_unified_api.cc | 4 +- .../tools/converter/registry/CMakeLists.txt | 1 + tests/st/cpp/model/test_zero_copy.cc | 8 +- 41 files changed, 1080 insertions(+), 513 deletions(-) create mode 100644 mindspore/lite/src/common/mutable_tensor_impl.h create mode 100644 mindspore/lite/src/extendrt/utils/tensor_default_impl.h diff --git a/include/api/types.h b/include/api/types.h index 20ad30f6d8f..2aacb8f9e8a 100644 --- a/include/api/types.h +++ b/include/api/types.h @@ -81,10 +81,11 @@ class MS_API MSTensor { /// \param[in] shape The shape of the MSTensor. /// \param[in] data The data pointer that points to allocated memory. /// \param[in] data_len The length of the memory, in bytes. + /// \param[in] own_data Whether the data memory should be freed in MSTensor destruction. /// /// \return A pointer of MSTensor. static inline MSTensor *CreateRefTensor(const std::string &name, DataType type, const std::vector &shape, - const void *data, size_t data_len) noexcept; + const void *data, size_t data_len, bool own_data = true) noexcept; /// \brief Creates a MSTensor object, whose device data can be directly accessed by Model, must be used in pairs with /// DestroyTensorPtr. @@ -96,8 +97,8 @@ class MS_API MSTensor { /// \param[in] data_len The length of the memory, in bytes. /// /// \return A pointer of MSTensor. - static inline MSTensor *CreateDevTensor(const std::string &name, DataType type, const std::vector &shape, - const void *data, size_t data_len) noexcept; + static inline MSTensor CreateDeviceTensor(const std::string &name, DataType type, const std::vector &shape, + void *data, size_t data_len) noexcept; /// \brief Creates a MSTensor object from local file, must be used in pairs with DestroyTensorPtr. /// @@ -125,7 +126,7 @@ class MS_API MSTensor { /// \return A vector container containing several strings. static inline std::vector TensorToStrings(const MSTensor &tensor); - /// \brief Destroy an object created by Clone, StringsToTensor, CreateRefTensor, CreateDevTensor or CreateTensor. Do + /// \brief Destroy an object created by Clone, StringsToTensor, CreateRefTensor or CreateTensor. Do /// not use it to destroy MSTensor from other sources. /// /// \param[in] tensor A MSTensor object. @@ -207,6 +208,13 @@ class MS_API MSTensor { /// \return The boolean value that indicates whether the MSTensor equals tensor. bool operator==(const MSTensor &tensor) const; + /// \brief Get the boolean value that indicates whether the MSTensor not equals tensor. + /// + /// \param[in] another MSTensor. + /// + /// \return The boolean value that indicates whether the MSTensor not equals tensor. + bool operator!=(const MSTensor &tensor) const; + /// \brief Set the shape of for the MSTensor. Only valid for Lite. /// /// \param[in] shape Shape of the MSTensor, a vector of int64_t. @@ -251,7 +259,20 @@ class MS_API MSTensor { /// \note The memory pointed to origin data pointer of MSTensor needs to be managed by the user /// /// \param[in] data A pointer to the data of the MSTensor. - void SetData(void *data); + /// \param[in] own_data Whether the data memory should be freed in MSTensor destruction. + void SetData(void *data, bool own_data = true); + + /// \brief Set the device data address for the MSTensor. Only valid for Lite. + /// + /// \note The memory pointed to origin data pointer of MSTensor needs to be managed by the user + /// + /// \param[in] data A pointer to the device data of the MSTensor. + void SetDeviceData(void *data); + + /// \brief Get the device data address of the MSTensor set by SetDeviceData. Only valid for Lite. + /// + /// \return A pointer to the device data of the MSTensor. + void *GetDeviceData(); /// \brief Get the quantization parameters of the MSTensor. Only valid for Lite. /// @@ -270,9 +291,9 @@ class MS_API MSTensor { static MSTensor *CreateTensor(const std::vector &name, enum DataType type, const std::vector &shape, const void *data, size_t data_len) noexcept; static MSTensor *CreateRefTensor(const std::vector &name, enum DataType type, const std::vector &shape, - const void *data, size_t data_len) noexcept; - static MSTensor *CreateDevTensor(const std::vector &name, enum DataType type, const std::vector &shape, - const void *data, size_t data_len) noexcept; + const void *data, size_t data_len, bool own_data) noexcept; + static MSTensor CreateDeviceTensor(const std::vector &name, enum DataType type, + const std::vector &shape, void *data, size_t data_len) noexcept; static MSTensor *CreateTensorFromFile(const std::vector &file, enum DataType type, const std::vector &shape) noexcept; static MSTensor *CharStringsToTensor(const std::vector &name, const std::vector> &str); @@ -313,13 +334,13 @@ MSTensor *MSTensor::CreateTensor(const std::string &name, enum DataType type, co } MSTensor *MSTensor::CreateRefTensor(const std::string &name, enum DataType type, const std::vector &shape, - const void *data, size_t data_len) noexcept { - return CreateRefTensor(StringToChar(name), type, shape, data, data_len); + const void *data, size_t data_len, bool own_data) noexcept { + return CreateRefTensor(StringToChar(name), type, shape, data, data_len, own_data); } -MSTensor *MSTensor::CreateDevTensor(const std::string &name, enum DataType type, const std::vector &shape, - const void *data, size_t data_len) noexcept { - return CreateDevTensor(StringToChar(name), type, shape, data, data_len); +MSTensor MSTensor::CreateDeviceTensor(const std::string &name, enum DataType type, const std::vector &shape, + void *data, size_t data_len) noexcept { + return CreateDeviceTensor(StringToChar(name), type, shape, data, data_len); } MSTensor *MSTensor::CreateTensorFromFile(const std::string &file, enum DataType type, diff --git a/mindspore/ccsrc/cxx_api/types.cc b/mindspore/ccsrc/cxx_api/types.cc index 43855800338..a3e357cc959 100644 --- a/mindspore/ccsrc/cxx_api/types.cc +++ b/mindspore/ccsrc/cxx_api/types.cc @@ -155,7 +155,8 @@ MSTensor *MSTensor::CreateTensor(const std::vector &name, enum DataType ty } MSTensor *MSTensor::CreateRefTensor(const std::vector &name, enum DataType type, - const std::vector &shape, const void *data, size_t data_len) noexcept { + const std::vector &shape, const void *data, size_t data_len, + bool) noexcept { std::string name_str = CharToString(name); try { std::shared_ptr impl = std::make_shared(name_str, type, shape, data, data_len, false); @@ -170,19 +171,18 @@ MSTensor *MSTensor::CreateRefTensor(const std::vector &name, enum DataType } } -MSTensor *MSTensor::CreateDevTensor(const std::vector &name, enum DataType type, - const std::vector &shape, const void *data, size_t data_len) noexcept { +MSTensor MSTensor::CreateDeviceTensor(const std::vector &name, enum DataType type, + const std::vector &shape, void *data, size_t data_len) noexcept { std::string name_str = CharToString(name); try { std::shared_ptr impl = std::make_shared(name_str, type, shape, data, data_len, true); - MSTensor *ret = new MSTensor(impl); - return ret; + return MSTensor(impl); } catch (const std::bad_alloc &) { MS_LOG(ERROR) << "Malloc memory failed."; - return nullptr; + return MSTensor(nullptr); } catch (...) { MS_LOG(ERROR) << "Unknown error occurred."; - return nullptr; + return MSTensor(nullptr); } } @@ -382,6 +382,10 @@ bool MSTensor::operator==(std::nullptr_t) const { return impl_ == nullptr; } bool MSTensor::operator!=(std::nullptr_t) const { return impl_ != nullptr; } +bool MSTensor::operator==(const MSTensor &tensor) const { return impl_ == tensor.impl_; } + +bool MSTensor::operator!=(const MSTensor &tensor) const { return impl_ != tensor.impl_; } + MSTensor *MSTensor::Clone() const { MS_EXCEPTION_IF_NULL(impl_); try { @@ -456,7 +460,11 @@ void MSTensor::SetFormat(mindspore::Format) { MS_LOG_EXCEPTION << "Invalid imple mindspore::Format MSTensor::format() const { MS_LOG_EXCEPTION << "Invalid implement."; } -void MSTensor::SetData(void *) { MS_LOG_EXCEPTION << "Invalid implement."; } +void MSTensor::SetData(void *, bool) { MS_LOG_EXCEPTION << "Invalid implement."; } + +void MSTensor::SetDeviceData(void *) { MS_LOG_EXCEPTION << "Invalid implement."; } + +void *MSTensor::GetDeviceData() { MS_LOG_EXCEPTION << "Invalid implement."; } std::vector MSTensor::QuantParams() const { MS_LOG_EXCEPTION << "Invalid implement."; } diff --git a/mindspore/core/ir/tensor.cc b/mindspore/core/ir/tensor.cc index 6979442555f..c9e2c5f8e83 100644 --- a/mindspore/core/ir/tensor.cc +++ b/mindspore/core/ir/tensor.cc @@ -628,7 +628,8 @@ Tensor::Tensor(const Tensor &tensor) device_event_(tensor.device_event_), lazy_callback_(tensor.lazy_callback_), user_data_(tensor.user_data_), - compression_type_(tensor.compression_type_) {} + compression_type_(tensor.compression_type_), + tensor_name_(tensor.tensor_name_) {} Tensor::Tensor(const Tensor &tensor, TypeId data_type) : MetaTensor(data_type, tensor.shape_), @@ -649,7 +650,8 @@ Tensor::Tensor(const Tensor &tensor, TypeId data_type) device_event_(tensor.device_event_), lazy_callback_(tensor.lazy_callback_), user_data_(tensor.user_data_), - compression_type_(tensor.compression_type_) {} + compression_type_(tensor.compression_type_), + tensor_name_(tensor.tensor_name_) {} Tensor::Tensor(TypeId data_type, const ShapeVector &shape, TensorDataPtr data) : MetaTensor(data_type, shape), data_(std::move(data)), id_(MakeId()) {} diff --git a/mindspore/core/ir/tensor.h b/mindspore/core/ir/tensor.h index ce015856bf3..c00b249efa7 100644 --- a/mindspore/core/ir/tensor.h +++ b/mindspore/core/ir/tensor.h @@ -669,6 +669,16 @@ class MS_CORE_API Tensor final : public MetaTensor { /// \return tensor compression type. TensorCompressionType compression_type() const { return compression_type_; } + /// \brief Set tensor name. + /// + /// \param[in] tensor_name The tensor name. + void set_name(const std::string &tensor_name) { tensor_name_ = tensor_name; } + + /// \brief Get the tensor name. + /// + /// \return tensor name. + const std::string &name() const { return tensor_name_; } + private: void ExecuteLazyTask() const; @@ -695,6 +705,8 @@ class MS_CORE_API Tensor final : public MetaTensor { std::function lazy_callback_{nullptr}; UserData user_data_; TensorCompressionType compression_type_{kNoCompression}; + + std::string tensor_name_; }; // CSRTensor entity class diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt index a8bf393c57e..3fbaefe0555 100644 --- a/mindspore/lite/CMakeLists.txt +++ b/mindspore/lite/CMakeLists.txt @@ -67,6 +67,10 @@ if(MACHINE_LINUX_ARM64) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+fp16") endif() +if(DEFINED ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE}) + set(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE $ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE}) +endif() + if(DEFINED ENV{MSLITE_ENABLE_EXPERIMENTAL_KERNEL}) set(MSLITE_ENABLE_EXPERIMENTAL_KERNEL $ENV{MSLITE_ENABLE_EXPERIMENTAL_KERNEL}) endif() @@ -90,6 +94,10 @@ if(DEFINED ENV{MSLITE_ENABLE_TRAIN}) set(MSLITE_ENABLE_TRAIN $ENV{MSLITE_ENABLE_TRAIN}) endif() +if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE) + set(MSLITE_ENABLE_TRAIN OFF) +endif() + if(DEFINED ENV{MSLITE_ENABLE_SERVER_INFERENCE}) set(MSLITE_ENABLE_SERVER_INFERENCE $ENV{MSLITE_ENABLE_SERVER_INFERENCE}) endif() @@ -187,6 +195,10 @@ if(DEFINED ENV{MSLITE_ENABLE_MODEL_ENCRYPTION}) endif() endif() +if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE) + set(MSLITE_ENABLE_MODEL_ENCRYPTION ON) +endif() + if(DEFINED ENV{MSLITE_ENABLE_COVERAGE}) set(MSLITE_ENABLE_COVERAGE $ENV{MSLITE_ENABLE_COVERAGE}) endif() @@ -211,10 +223,6 @@ if(MSLITE_ENABLE_GITEE_MIRROR) set(ENABLE_GITEE ON) endif() -if(DEFINED ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE}) - set(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE $ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE}) -endif() - if(DEFINED ENV{ENABLE_FAST_HASH_TABLE}) add_compile_definitions(ENABLE_FAST_HASH_TABLE) include_directories(${TOP_DIR}/third_party/robin_hood/include) diff --git a/mindspore/lite/src/common/mutable_tensor_impl.h b/mindspore/lite/src/common/mutable_tensor_impl.h new file mode 100644 index 00000000000..4c874fbc4fd --- /dev/null +++ b/mindspore/lite/src/common/mutable_tensor_impl.h @@ -0,0 +1,44 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_COMMON_MUTABLE_TESNOR_IMPL_H_ +#define MINDSPORE_LITE_SRC_COMMON_MUTABLE_TESNOR_IMPL_H_ + +#include +#include +#include +#include "ir/api_tensor_impl.h" + +namespace mindspore { +class MutableTensorImpl : public MSTensor::Impl { + public: + virtual void SetName(const std::string &name) = 0; + virtual void SetDataType(mindspore::DataType data_type) = 0; + virtual void SetShape(const std::vector &shape) = 0; + virtual mindspore::Format Format() const = 0; + virtual void SetFormat(mindspore::Format format) = 0; + virtual void SetData(void *data, bool own_data) = 0; + virtual bool IsConst() const = 0; + virtual void SetAllocator(const std::shared_ptr &allocator) = 0; + virtual std::shared_ptr GetAllocator() const = 0; + virtual std::vector GetQuantParams() const = 0; + virtual void SetQuantParams(const std::vector &quant_param) = 0; + virtual void SetDeviceData(void *data) = 0; + virtual void *GetDeviceData() = 0; +}; +using MutableTensorImplPtr = std::shared_ptr; +} // namespace mindspore +#endif // MINDSPORE_LITE_SRC_COMMON_MUTABLE_TESNOR_IMPL_H_ diff --git a/mindspore/lite/src/extendrt/cxx_api/model/model_impl.cc b/mindspore/lite/src/extendrt/cxx_api/model/model_impl.cc index 778773f4429..2cd35b867a4 100644 --- a/mindspore/lite/src/extendrt/cxx_api/model/model_impl.cc +++ b/mindspore/lite/src/extendrt/cxx_api/model/model_impl.cc @@ -89,21 +89,10 @@ std::vector ModelImpl::GetInputs() { std::vector inputs; auto graph_inputs = session_->GetInputs(); - auto graph_input_names = session_->GetInputNames(); for (size_t i = 0; i < graph_inputs.size(); i++) { - auto graph_input = graph_inputs[i]; - std::string graph_input_name = graph_input_names[i]; - auto type_id = graph_input->data_type_c(); - auto data_type = static_cast(type_id); - auto ms_tensor_ptr = MSTensor::CreateRefTensor(graph_input_name, data_type, graph_input->shape_c(), - graph_input->data_c(), graph_input->Size()); - if (ms_tensor_ptr == nullptr) { - MS_LOG_WARNING << "Failed to create input tensor "; - return {}; - } - inputs.push_back(*ms_tensor_ptr); - delete ms_tensor_ptr; + auto tensor_impl = graph_inputs[i]; + inputs.push_back(MSTensor(tensor_impl)); } return inputs; } @@ -111,23 +100,10 @@ std::vector ModelImpl::GetInputs() { std::vector ModelImpl::GetOutputs() { MS_EXCEPTION_IF_NULL(session_); std::vector outputs; - auto graph_outputs = session_->GetOutputs(); - auto graph_output_names = session_->GetOutputNames(); - for (size_t i = 0; i < graph_outputs.size(); i++) { - auto graph_output = graph_outputs[i]; - std::string graph_output_name = graph_output_names[i]; - auto type_id = graph_output->data_type_c(); - auto data_type = static_cast(type_id); - auto ms_tensor_ptr = MSTensor::CreateRefTensor(graph_output_name, data_type, graph_output->shape_c(), - graph_output->data_c(), graph_output->Size()); - if (ms_tensor_ptr == nullptr) { - MS_LOG_WARNING << "Failed to create output tensor "; - return {}; - } - outputs.push_back(*ms_tensor_ptr); - delete ms_tensor_ptr; + auto tensor_impl = graph_outputs[i]; + outputs.push_back(MSTensor(tensor_impl)); } return outputs; } @@ -137,17 +113,12 @@ MSTensor ModelImpl::GetInputByTensorName(const std::string &name) { MS_LOG(ERROR) << "Session is null."; return MSTensor(nullptr); } - auto tensor_ptr = session_->GetInputByTensorName(name); - if (tensor_ptr == nullptr) { + auto tensor_impl = session_->GetInputByTensorName(name); + if (tensor_impl == nullptr) { MS_LOG(ERROR) << "Model does not contains tensor " << name << " ."; return MSTensor(nullptr); } - auto ms_inputs = TensorUtils::TensorPtrToMSTensor({tensor_ptr}, {name}); - if (ms_inputs.empty()) { - MS_LOG(ERROR) << "Tensor to ms tensor failed." << name << " ."; - return MSTensor(nullptr); - } - return ms_inputs[0]; + return MSTensor(tensor_impl); } std::vector ModelImpl::GetOutputTensorNames() { @@ -164,35 +135,58 @@ MSTensor ModelImpl::GetOutputByTensorName(const std::string &name) { MS_LOG(ERROR) << "Session is null."; return MSTensor(nullptr); } - auto tensor_ptr = session_->GetOutputByTensorName(name); - if (tensor_ptr == nullptr) { + auto tensor_impl = session_->GetOutputByTensorName(name); + if (tensor_impl == nullptr) { MS_LOG(ERROR) << "Model does not contains tensor " << name << " ."; return MSTensor(nullptr); } - auto ms_outputs = TensorUtils::TensorPtrToMSTensor({tensor_ptr}, {name}); - if (ms_outputs.empty()) { - MS_LOG(ERROR) << "Tensor to ms tensor failed." << name << " ."; - return MSTensor(nullptr); - } - return ms_outputs[0]; + return MSTensor(tensor_impl); } Status ModelImpl::Predict(const std::vector &inputs, std::vector *outputs) { MS_EXCEPTION_IF_NULL(session_); MS_EXCEPTION_IF_NULL(outputs); - outputs->clear(); - std::vector graph_inputs = TensorUtils::MSTensorToTensorPtr(inputs); - std::vector graph_outputs; + std::vector graph_inputs = TensorUtils::MSTensorToTensor(inputs); + std::vector graph_outputs; + std::vector org_graph_outputs; + if (!outputs->empty()) { + graph_outputs = TensorUtils::MSTensorToTensor(*outputs); + org_graph_outputs = graph_outputs; + } auto ret = session_->RunGraph(graph_inputs, &graph_outputs); if (ret != kSuccess) { MS_LOG(ERROR) << "ModelImpl::Predict RunGraph failed with " << ret; return ret; } - auto ms_outputs = TensorUtils::TensorPtrToMSTensor(graph_outputs, session_->GetOutputNames()); - (void)std::copy(ms_outputs.begin(), ms_outputs.end(), std::back_inserter(*outputs)); + if (outputs->empty() || org_graph_outputs != graph_outputs) { + *outputs = TensorUtils::TensorToMSTensor(graph_outputs, session_->GetOutputNames()); + } + auto session_outputs = GetOutputs(); + if (graph_outputs.size() != session_outputs.size()) { + MS_LOG(ERROR) << "Outputs count get from session " << session_outputs.size() << " != outputs count of RunGraph " + << graph_outputs.size(); + return kCoreFailed; + } + for (size_t i = 0; i < session_outputs.size(); i++) { + auto &session_output = session_outputs[i]; + auto &execute_output = outputs->at(i); + session_output.SetShape(execute_output.Shape()); + if (session_output.Data().get() != execute_output.Data().get()) { + session_output.SetData(execute_output.MutableData(), false); + } + if (session_output.GetDeviceData() != execute_output.GetDeviceData()) { + session_output.SetDeviceData(execute_output.GetDeviceData()); + } + } return kSuccess; } +Status ModelImpl::Predict() { + auto inputs = GetInputs(); + auto outputs = GetOutputs(); + return Predict(inputs, &outputs); +} + bool ModelImpl::HasPreprocess() { return graph_->graph_data_->GetPreprocess().empty() ? false : true; } Status ModelImpl::Preprocess(const std::vector> &inputs, std::vector *outputs) { diff --git a/mindspore/lite/src/extendrt/cxx_api/model_pool/model_worker.cc b/mindspore/lite/src/extendrt/cxx_api/model_pool/model_worker.cc index 33a5723be75..eca0043b2f6 100644 --- a/mindspore/lite/src/extendrt/cxx_api/model_pool/model_worker.cc +++ b/mindspore/lite/src/extendrt/cxx_api/model_pool/model_worker.cc @@ -200,16 +200,21 @@ Status ModelWorker::Predict(const std::vector &inputs, std::vectorGetOutputs(); for (size_t i = 0; i < outputs->size(); i++) { - if (outputs->at(i).Data() != nullptr) { + auto &output = outputs->at(i); + if (output.Data() != nullptr || output.GetDeviceData() != nullptr) { /* user set graph-output-tensor from outside */ - model_output[i].SetData(outputs->at(i).MutableData()); + model_output[i].SetShape(output.Shape()); + model_output[i].SetData(output.MutableData(), false); + model_output[i].SetDeviceData(output.GetDeviceData()); model_output[i].SetAllocator(nullptr); need_copy_output = false; } } for (size_t i = 0; i < inputs.size(); i++) { - model_input[i].SetData(const_cast(inputs[i]).MutableData()); - model_input[i].SetShape(inputs[i].Shape()); + auto &input = inputs[i]; + model_input[i].SetShape(input.Shape()); + model_input[i].SetData(const_cast(input).MutableData(), false); + model_input[i].SetDeviceData(const_cast(input).GetDeviceData()); } auto status = model_->Predict(model_input, &model_output, before, after); if (status != kSuccess) { @@ -232,6 +237,7 @@ Status ModelWorker::Predict(const std::vector &inputs, std::vectorsize(); i++) { outputs->at(i).SetShape(model_output[i].Shape()); model_output[i].SetData(nullptr); + model_output[i].SetDeviceData(nullptr); model_output[i].SetAllocator(nullptr); } } diff --git a/mindspore/lite/src/extendrt/cxx_api/types.cc b/mindspore/lite/src/extendrt/cxx_api/types.cc index 341f906d8a6..f20181cd246 100644 --- a/mindspore/lite/src/extendrt/cxx_api/types.cc +++ b/mindspore/lite/src/extendrt/cxx_api/types.cc @@ -20,6 +20,11 @@ #include "mindspore/core/ir/api_tensor_impl.h" #include "mindspore/core/utils/convert_utils_base.h" #include "utils/file_utils.h" +#include "common/utils.h" +#include "mindspore/core/ir/tensor.h" +#include "runtime/device/device_address.h" +#include "extendrt/utils/tensor_utils.h" +#include "extendrt/utils/tensor_default_impl.h" namespace mindspore { class Buffer::Impl { @@ -71,105 +76,12 @@ class Buffer::Impl { std::vector data_; }; -class MutableTensorImpl : public MSTensor::Impl { - public: - MutableTensorImpl() = default; - MutableTensorImpl(const std::string &name, enum DataType type, const std::vector &shape) - : name_(name), type_(type), shape_(shape) {} - - virtual void SetData(void *data) = 0; - - void SetShape(const std::vector &shape) { shape_ = shape; } - void SetDataType(mindspore::DataType data_type) { type_ = data_type; } - void SetTensorName(const std::string &name) { name_ = name; } - - mindspore::Format GetFormat() const { return format_; } - void SetFormat(mindspore::Format format) { format_ = format; } - - const std::string &Name() const override { return name_; } - enum DataType DataType() const override { return type_; } - const std::vector &Shape() const override { return shape_; } - - void SetAllocator(const std::shared_ptr &allocator) { allocator_ = allocator; } - std::shared_ptr GetAllocator() const { return allocator_; } - - std::vector QuantParams() const { return quant_param_; } - - void SetQuantParams(const std::vector &quant_param) { quant_param_ = quant_param; } - - protected: - std::string name_; - enum DataType type_ = DataType::kTypeUnknown; - enum Format format_ = mindspore::NCHW; - std::vector shape_; - std::shared_ptr allocator_ = nullptr; - std::vector quant_param_; -}; - -class TensorDefaultImpl : public MutableTensorImpl { - public: - TensorDefaultImpl() : buffer_() {} - ~TensorDefaultImpl() override = default; - TensorDefaultImpl(const std::string &name, enum DataType type, const std::vector &shape, const void *data, - size_t data_len) - : MutableTensorImpl(name, type, shape), buffer_(data, data_len) {} - - std::shared_ptr Data() const override { - return std::shared_ptr(buffer_.Data(), [](const void *) {}); - } - - void SetData(void *data) override { - auto data_len = buffer_.DataSize(); - buffer_.SetData(data, data_len); - } - - void *MutableData() override { return buffer_.MutableData(); } - size_t DataSize() const override { return buffer_.DataSize(); } - - bool IsDevice() const override { return false; } - - std::shared_ptr Clone() const override { - return std::make_shared(name_, type_, shape_, buffer_.Data(), buffer_.DataSize()); - } - - private: - Buffer buffer_; -}; - -class TensorReferenceImpl : public MutableTensorImpl { - public: - TensorReferenceImpl() = default; - ~TensorReferenceImpl() override = default; - TensorReferenceImpl(const std::string &name, enum DataType type, const std::vector &shape, const void *data, - size_t data_len, bool is_device) - : MutableTensorImpl(name, type, shape), data_(data), data_size_(data_len), is_device_(is_device) {} - - std::shared_ptr Data() const override { - return std::shared_ptr(data_, [](const void *) {}); - } - - void SetData(void *data) override { data_ = data; } - - void *MutableData() override { return const_cast(data_); } - size_t DataSize() const override { return data_size_; } - - bool IsDevice() const override { return is_device_; } - - std::shared_ptr Clone() const override { - return std::make_shared(name_, type_, shape_, data_, data_size_, is_device_); - } - - protected: - const void *data_ = nullptr; - size_t data_size_ = 0; - bool is_device_ = false; -}; - MSTensor *MSTensor::CreateTensor(const std::vector &name, enum DataType type, const std::vector &shape, const void *data, size_t data_len) noexcept { std::string name_str = CharToString(name); try { - std::shared_ptr impl = std::make_shared(name_str, type, shape, data, data_len); + std::shared_ptr impl = + std::make_shared(name_str, type, shape, data, data_len, false, false); MSTensor *ret = new MSTensor(impl); return ret; } catch (const std::bad_alloc &) { @@ -182,10 +94,17 @@ MSTensor *MSTensor::CreateTensor(const std::vector &name, enum DataType ty } MSTensor *MSTensor::CreateRefTensor(const std::vector &name, enum DataType type, - const std::vector &shape, const void *data, size_t data_len) noexcept { + const std::vector &shape, const void *data, size_t data_len, + bool own_data) noexcept { std::string name_str = CharToString(name); try { - std::shared_ptr impl = std::make_shared(name_str, type, shape, data, data_len, false); + std::shared_ptr impl = + std::make_shared(name_str, type, shape, data, data_len, true, own_data); + if (data_len < impl->DataSize()) { + MS_LOG(ERROR) << "The size " << data_len << " of data cannot be less that the memory size required by the shape " + << shape << " and data type " << TypeIdToString(static_cast(type)); + return nullptr; + } MSTensor *ret = new MSTensor(impl); return ret; } catch (const std::bad_alloc &) { @@ -197,19 +116,24 @@ MSTensor *MSTensor::CreateRefTensor(const std::vector &name, enum DataType } } -MSTensor *MSTensor::CreateDevTensor(const std::vector &name, enum DataType type, - const std::vector &shape, const void *data, size_t data_len) noexcept { +MSTensor MSTensor::CreateDeviceTensor(const std::vector &name, enum DataType type, + const std::vector &shape, void *data, size_t data_size) noexcept { std::string name_str = CharToString(name); try { - std::shared_ptr impl = std::make_shared(name_str, type, shape, data, data_len, true); - MSTensor *ret = new MSTensor(impl); - return ret; + auto impl = std::make_shared(name_str, type, shape); + if (data_size < impl->DataSize()) { + MS_LOG(ERROR) << "The size " << data_size << " of data cannot be less that the memory size required by the shape " + << shape << " and data type " << TypeIdToString(static_cast(type)); + return MSTensor(nullptr); + } + impl->SetDeviceData(data); + return MSTensor(impl); } catch (const std::bad_alloc &) { MS_LOG(ERROR) << "Malloc memory failed."; - return nullptr; + return MSTensor(nullptr); } catch (...) { MS_LOG(ERROR) << "Unknown error occurred."; - return nullptr; + return MSTensor(nullptr); } } @@ -399,13 +323,17 @@ MSTensor::MSTensor(std::nullptr_t) : impl_(nullptr) {} MSTensor::MSTensor(const std::shared_ptr &impl) : impl_(impl) { MS_EXCEPTION_IF_NULL(impl); } MSTensor::MSTensor(const std::vector &name, enum DataType type, const std::vector &shape, const void *data, size_t data_len) - : impl_(std::make_shared(CharToString(name), type, shape, data, data_len)) {} + : impl_(std::make_shared(CharToString(name), type, shape, data, data_len, false, false)) {} MSTensor::~MSTensor() = default; bool MSTensor::operator==(std::nullptr_t) const { return impl_ == nullptr; } bool MSTensor::operator!=(std::nullptr_t) const { return impl_ != nullptr; } +bool MSTensor::operator==(const MSTensor &tensor) const { return impl_ == tensor.impl_; } + +bool MSTensor::operator!=(const MSTensor &tensor) const { return impl_ != tensor.impl_; } + MSTensor *MSTensor::Clone() const { MS_EXCEPTION_IF_NULL(impl_); try { @@ -478,7 +406,7 @@ void MSTensor::SetDataType(enum DataType data_type) { void MSTensor::SetTensorName(const std::vector &tensor_name) { MS_EXCEPTION_IF_NULL(impl_); - std::static_pointer_cast(impl_)->SetTensorName(CharToString(tensor_name)); + std::static_pointer_cast(impl_)->SetName(CharToString(tensor_name)); } void MSTensor::SetAllocator(std::shared_ptr allocator) { @@ -498,17 +426,27 @@ void MSTensor::SetFormat(mindspore::Format format) { mindspore::Format MSTensor::format() const { MS_EXCEPTION_IF_NULL(impl_); - return std::static_pointer_cast(impl_)->GetFormat(); + return std::static_pointer_cast(impl_)->Format(); } -void MSTensor::SetData(void *data) { +void MSTensor::SetData(void *data, bool own_data) { MS_EXCEPTION_IF_NULL(impl_); - std::static_pointer_cast(impl_)->SetData(data); + std::static_pointer_cast(impl_)->SetData(data, own_data); +} + +void MSTensor::SetDeviceData(void *data) { + MS_EXCEPTION_IF_NULL(impl_); + std::static_pointer_cast(impl_)->SetDeviceData(data); +} + +void *MSTensor::GetDeviceData() { + MS_EXCEPTION_IF_NULL(impl_); + return std::static_pointer_cast(impl_)->GetDeviceData(); } std::vector MSTensor::QuantParams() const { MS_EXCEPTION_IF_NULL(impl_); - return std::static_pointer_cast(impl_)->QuantParams(); + return std::static_pointer_cast(impl_)->GetQuantParams(); } void MSTensor::SetQuantParams(std::vector quant_param) { diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/distribution/distribution_base.cc b/mindspore/lite/src/extendrt/delegate/tensorrt/distribution/distribution_base.cc index 3fc43580f61..adb63694b26 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/distribution/distribution_base.cc +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/distribution/distribution_base.cc @@ -15,9 +15,10 @@ */ #include "src/extendrt/delegate/tensorrt/distribution/distribution_base.h" +#include "src/extendrt/delegate/plugin/tensorrt_executor_plugin.h" namespace mindspore::lite { -int GetGPUGroupSize() { return 1; } +int GetGPUGroupSize() { return TensorRTPlugin::GetInstance().GetGPUGroupSize(); } -int GetRankID() { return 0; } +int GetRankID() { return TensorRTPlugin::GetInstance().GetRankID(); } } // namespace mindspore::lite diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/tensor_info.cc b/mindspore/lite/src/extendrt/delegate/tensorrt/tensor_info.cc index f5d7259dfd4..38547b267ec 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensor_info.cc +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensor_info.cc @@ -38,13 +38,29 @@ class TensorInfoImpl { tensor_val_(tensor_val) { is_const_ = (data_ != nullptr); if (data_ == nullptr || data_len_ == 0) { - auto ele_num = std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies()); - auto type_size = DataTypeSize(static_cast(dType_)); + auto ele_num = ElementNum(); + auto type_size = item_size(); temp_data_.resize(ele_num * type_size); data_ = temp_data_.data(); data_len_ = temp_data_.size(); } } + void SetShape(const std::vector &shape) { + shape_ = shape; + auto new_elem_num = ElementNum(); + auto type_size = item_size(); + auto data_size = new_elem_num * type_size; + if (data_size != temp_data_.size() && data_ == temp_data_.data()) { + temp_data_.resize(data_size); + data_ = temp_data_.data(); + data_len_ = data_size; + } + } + + int64_t ElementNum() const { return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies()); } + + size_t item_size() const { return DataTypeSize(static_cast(dType_)); } + std::string name_; mindspore::DataType dType_ = mindspore::DataType::kTypeUnknown; std::vector shape_; @@ -109,7 +125,7 @@ size_t TensorInfo::DataSize() const { if (impl_ == nullptr) { return 0; } - return impl_->data_len_; + return ElementNum() * item_size(); } bool TensorInfo::IsConst() const { @@ -119,13 +135,18 @@ bool TensorInfo::IsConst() const { return impl_->is_const_ && impl_->data_ != nullptr; } -size_t TensorInfo::item_size() const { return DataTypeSize(static_cast(DataType())); } +size_t TensorInfo::item_size() const { + if (impl_ == nullptr) { + return 0; + } + return impl_->item_size(); +} void TensorInfo::SetShape(const std::vector &shape) { if (impl_ == nullptr) { return; } - impl_->shape_ = shape; + impl_->SetShape(shape); } void TensorInfo::SetData(const void *data, size_t data_len) { @@ -140,11 +161,7 @@ int64_t TensorInfo::ElementNum() const { if (impl_ == nullptr) { return 0; } - if (impl_->shape_.empty()) { - // element number of scalar is 1 - return 1; - } - return std::accumulate(impl_->shape_.begin(), impl_->shape_.end(), 1, std::multiplies()); + return impl_->ElementNum(); } TensorInfo &TensorInfo::operator=(const TensorInfo &other) { diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_allocator.cc b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_allocator.cc index 30cf29bc34e..4bf21a9f2b3 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_allocator.cc +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_allocator.cc @@ -106,6 +106,33 @@ int TensorRTAllocator::SyncMemDeviceToHost(tensor::Tensor *host_tensor, const st return SyncMemInHostAndDevice(host_tensor->data_c(), device_tensor_name, host_tensor->Size(), false, sync); } +int TensorRTAllocator::SyncMemDeviceToHost(void *dst_data, size_t data_size, const std::string &device_tensor_name) { + if (dst_data == nullptr) { + MS_LOG(ERROR) << " dst host data cannot be nullptr."; + return RET_ERROR; + } + auto it = cuda_tensor_map_.find(device_tensor_name); + if (it == cuda_tensor_map_.end()) { + MS_LOG(ERROR) << " cannot find device address " << device_tensor_name; + return RET_ERROR; + } + CudaTensorParam ¤t_cuda_tensor = it->second; + // is memcpy from device to host, the host mem is valid, change tag for mem pool. + current_cuda_tensor.is_valid_mem = true; + auto device_ptr = current_cuda_tensor.data; + if (device_ptr == nullptr) { + MS_LOG(ERROR) << "device_ptr is null for " << device_tensor_name; + return RET_ERROR; + } + auto cuda_ret = cudaMemcpy(dst_data, device_ptr, data_size, cudaMemcpyDeviceToHost); + if (cuda_ret != cudaSuccess) { + MS_LOG(ERROR) << "copy mem failed,ret " << cudaGetErrorName(cuda_ret); + return RET_ERROR; + } + MS_LOG(INFO) << "cuda memcpy success for " << device_tensor_name; + return RET_OK; +} + int TensorRTAllocator::SyncMemInHostAndDevice(tensor::Tensor *host_tensor, const std::string &device_tensor_name, bool is_host2device, bool sync) { if (host_tensor == NULL) { diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_allocator.h b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_allocator.h index 412787168a2..53655d1e53d 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_allocator.h +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_allocator.h @@ -54,6 +54,7 @@ class TensorRTAllocator { int SyncMemHostToDevice(const tensor::Tensor &host_tensor, const std::string &device_tensor_name, bool sync = true); int SyncMemDeviceToHost(tensor::Tensor *host_tensor, const std::string &device_tensor_name, bool sync = true); + int SyncMemDeviceToHost(void *dst_data, size_t data_size, const std::string &device_tensor_name); int ClearDeviceMem(); diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.cc b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.cc index 326e7838102..3395f107efc 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.cc +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.cc @@ -295,29 +295,19 @@ Status GetModelOutputsInfo(KernelGraphPtr kernel_graph, std::vectoroutputs(); + auto outputs = kernel_graph->outputs(); // find parameters of graph inputs - for (size_t i = 0; i < kernel_graph_outputs.size(); ++i) { - auto output = kernel_graph_outputs[i]; + for (size_t i = 0; i < outputs.size(); ++i) { + auto output = outputs[i]; auto cur_abstract = output->abstract(); + size_t output_num = 1; if (cur_abstract->isa()) { auto abs_tuple = cur_abstract->Clone()->cast(); MS_EXCEPTION_IF_NULL(abs_tuple); - size_t output_num = abs_tuple->elements().size(); - for (size_t output_idx = 0; output_idx < output_num; ++output_idx) { - auto tensor_id = common::AnfAlgo::VisitKernelWithReturnType(output, output_idx); - auto it = - std::find_if(tensor_info_list.begin(), tensor_info_list.end(), - [&tensor_id](const NodeWithOutputIndex &index) { return index.kernel_index == tensor_id; }); - if (it != tensor_info_list.end()) { - output_tensors->push_back(it->tensor_info); - } else { - MS_LOG_ERROR << "Cannot find output tensor info " << tensor_id.first->fullname_with_scope(); - return mindspore::kLiteError; - } - } - } else { - auto tensor_id = common::AnfAlgo::VisitKernelWithReturnType(output, 0); + output_num = abs_tuple->elements().size(); + } + for (size_t output_idx = 0; output_idx < output_num; ++output_idx) { + auto tensor_id = common::AnfAlgo::VisitKernelWithReturnType(output, output_idx); auto it = std::find_if(tensor_info_list.begin(), tensor_info_list.end(), [&tensor_id](const NodeWithOutputIndex &index) { return index.kernel_index == tensor_id; }); @@ -443,12 +433,6 @@ Status TensorRTExecutor::BuildSubGraph(const KernelGraphPtr &kernel_graph) { if (status != kSuccess) { return status; } - auto build_trt_graph = [kernel_graph](const std::vector &tensorrt_ops) { - auto inputs = GraphInTensors(tensorrt_ops); - auto outputs = GraphOutTensors(tensorrt_ops); - auto ctx = TrtGraphContext{tensorrt_ops, inputs, outputs, nullptr}; - return ctx; - }; for (const auto &kernel_node : kernel_nodes) { auto node_name = kernel_node->fullname_with_scope(); std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node); @@ -468,11 +452,11 @@ Status TensorRTExecutor::BuildSubGraph(const KernelGraphPtr &kernel_graph) { tensorrt_op->SetRuntime(this->runtime_); tensorrt_ops.push_back(tensorrt_op); } - if (!tensorrt_ops.empty()) { - auto trt_ctx = build_trt_graph(tensorrt_ops); - tensorrt_ops.clear(); - tensorrt_graph_list_.push_back(trt_ctx); + status = GetModelOutputsInfo(kernel_graph, &tensor_info_list, &outputs_); + if (status != kSuccess) { + return status; } + tensorrt_graph_list_.push_back(TrtGraphContext{tensorrt_ops, inputs_, outputs_, nullptr}); status = UpdateTrtSubGraphInputsDepend(); if (status != kSuccess) { return status; @@ -486,10 +470,6 @@ Status TensorRTExecutor::BuildSubGraph(const KernelGraphPtr &kernel_graph) { return mindspore::kLiteError; } } - status = GetModelOutputsInfo(kernel_graph, &tensor_info_list, &outputs_); - if (status != kSuccess) { - return status; - } return mindspore::kSuccess; } @@ -625,20 +605,27 @@ bool TensorRTExecutor::RunGraph(const FuncGraphPtr &graph, const std::vectorempty() && outputs_.size() != outputs->size()) { + MS_LOG(ERROR) << "Graph outputs size " << inputs_.size() << " != expected outputs size " << outputs->size(); + return false; + } + if (tensorrt_graph_list_.size() == 1) { + return tensorrt_graph_list_[0].sub_graph->Execute(inputs, outputs) == RET_OK; + } + std::map> tensor_val_map; for (size_t i = 0; i < inputs.size(); i++) { - tensor_val_map_[inputs_[i]] = std::make_shared(inputs[i]); + tensor_val_map[inputs_[i]] = std::make_shared(inputs[i]); } for (auto &sub_graph : tensorrt_graph_list_) { std::vector sub_inputs; std::vector sub_outputs; for (auto &item : sub_graph.inputs) { - auto it = tensor_val_map_.find(item); - if (it == tensor_val_map_.end()) { + auto it = tensor_val_map.find(item); + if (it == tensor_val_map.end()) { MS_LOG(ERROR) << "Cannot find input tensor " << item.Name() << " in tensor val map"; return false; } @@ -659,12 +646,13 @@ bool TensorRTExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector(sub_outputs[i]); + tensor_val_map[sub_graph.outputs[i]] = std::make_shared(sub_outputs[i]); } } + outputs->clear(); for (auto &item : outputs_) { - auto it = tensor_val_map_.find(item); - if (it == tensor_val_map_.end()) { + auto it = tensor_val_map.find(item); + if (it == tensor_val_map.end()) { MS_LOG(ERROR) << "Cannot find input tensor " << item.Name() << " in tensor val map"; return false; } diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.h b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.h index 04df4288630..0267c5d1a66 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.h +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.h @@ -81,7 +81,6 @@ class TensorRTExecutor : public device::GraphExecutor { cudaStream_t stream_{nullptr}; std::vector kernel_list_; - std::map> tensor_val_map_; std::vector tensorrt_graph_list_; std::vector min_dims_; diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.cc b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.cc index c2068a738b6..5f761d38312 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.cc +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.cc @@ -25,6 +25,7 @@ #include #include #include "src/extendrt/delegate/delegate_utils.h" +#include "src/common/utils.h" #include "ops/transpose.h" #include "ops/reshape.h" @@ -482,24 +483,16 @@ int TensorRTSubGraph::Prepare() { return RET_ERROR; } } - for (auto tensor : outputs_) { + for (auto &tensor : outputs_) { int index = this->engine_->getBindingIndex(tensor.Name().c_str()); auto out_dims = trt_context_->getBindingDimensions(index); int elem_num = std::accumulate(out_dims.d, out_dims.d + out_dims.nbDims, 1, std::multiplies()); DebugDims(out_dims); - std::map TypeByte = { - {DataType::kTypeUnknown, 0}, {DataType::kObjectTypeString, 0}, {DataType::kNumberTypeBool, 1}, - {DataType::kNumberTypeInt8, 1}, {DataType::kNumberTypeInt16, 2}, {DataType::kNumberTypeInt32, 4}, - {DataType::kNumberTypeInt64, 8}, {DataType::kNumberTypeUInt8, 1}, {DataType::kNumberTypeUInt16, 2}, - {DataType::kNumberTypeUInt32, 4}, {DataType::kNumberTypeUInt64, 8}, {DataType::kNumberTypeFloat16, 2}, - {DataType::kNumberTypeFloat32, 4}, {DataType::kNumberTypeFloat64, 8}, - }; - if (tensor.Data() == nullptr) { - MS_LOG(INFO) << "Set output shape by tensorrt binding output"; - tensor.SetShape(lite::ConvertMSShape(out_dims)); - tensor.MutableData(); - } - auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor, elem_num * TypeByte[tensor.DataType()]); + auto new_shape = lite::ConvertMSShape(out_dims); + MS_LOG(INFO) << "Set output shape of " << tensor.Name() << " to " << new_shape << " by tensorrt binding output"; + tensor.SetShape(new_shape); + auto type_size = DataTypeSize(static_cast(tensor.DataType())); + auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor, elem_num * type_size); if (device_ptr == nullptr) { MS_LOG(ERROR) << "malloc for outputs tensor device memory failed."; return RET_ERROR; @@ -510,37 +503,20 @@ int TensorRTSubGraph::Prepare() { return RET_OK; } -int TensorRTSubGraph::ReSizeIfNeed(const std::vector &inputs) { - bool need_resize = false; +int TensorRTSubGraph::OnNewInputShapes(const std::vector &inputs) { if (inputs_.size() != inputs.size()) { MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != resize input size " << inputs.size(); return RET_ERROR; } - for (size_t i = 0; i < inputs_.size(); i++) { - if (inputs_[i].Shape() != inputs[i].shape()) { - need_resize = true; - break; - } - } - if (need_resize) { - return ReSize(inputs); - } - return RET_OK; -} - -int TensorRTSubGraph::ReSize(const std::vector &inputs) { - if (inputs_.size() != inputs.size()) { - MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != resize input size " << inputs.size(); - return RET_ERROR; - } - if (input_batchsize_index_ == -1) { - MS_LOG(ERROR) << "current network don't support resize."; - return RET_ERROR; - } + int batch_size = -1; for (size_t i = 0; i < trt_in_tensor_name_.size(); i++) { if (inputs_[i].Shape() == inputs[i].shape()) { continue; } + if (input_batchsize_index_ == -1) { + MS_LOG(ERROR) << "current network don't support resize."; + return RET_ERROR; + } inputs_[i].SetShape(inputs[i].shape()); if (ctx_->network() != nullptr) { for (int j = 0; j < ctx_->network()->getNbInputs(); j++) { @@ -558,18 +534,16 @@ int TensorRTSubGraph::ReSize(const std::vector &inputs) { MS_LOG(INFO) << "resize at input_batch_index " << input_batchsize_index_ << ", update batch size to " << inputs_[i].Shape()[input_batchsize_index_]; - runtime_->SetBatchSize(inputs_[i].Shape()[input_batchsize_index_]); - - // inputs_ is dupulated by mindrt, name is untustable. - auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(trt_in_tensor_name_[i], inputs_[i].DataSize(), - ConvertDataType(inputs_[i].DataType())); - if (device_ptr == nullptr) { - MS_LOG(ERROR) << "realloc for input tensor device memory failed."; + int new_batch_size = inputs_[i].Shape()[input_batchsize_index_]; + if (batch_size != -1 && batch_size != new_batch_size) { + MS_LOG(ERROR) << "Batch size " << batch_size << " of input 0 != batch size " << new_batch_size << " of input " + << i; return RET_ERROR; } + batch_size = new_batch_size; + runtime_->SetBatchSize(batch_size); + int index = this->engine_->getBindingIndex(trt_in_tensor_name_[i].c_str()); - MS_LOG(INFO) << "device index " << index << " for tensor : " << trt_in_tensor_name_[i] << " attr: " << device_ptr; - tensor_bindings_[index] = device_ptr; // Set actual input size nvinfer1::Dims input_dims = ConvertCudaDims(inputs_[i].Shape()); for (int od = 0; od < input_dims.nbDims; od++) { @@ -585,6 +559,140 @@ int TensorRTSubGraph::ReSize(const std::vector &inputs) { MS_LOG(ERROR) << "input dims need to be specified."; return RET_ERROR; } + if (batch_size != -1) { + for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) { + int index = this->engine_->getBindingIndex(trt_out_tensor_name_[i].c_str()); + auto out_dims = trt_context_->getBindingDimensions(index); + auto new_shape = lite::ConvertMSShape(out_dims); + MS_LOG(INFO) << "Set output shape of " << trt_out_tensor_name_[i] << " to " << new_shape + << " by tensorrt binding output"; + outputs_[i].SetShape(new_shape); + } + } + return RET_OK; +} + +int TensorRTSubGraph::PreExecute(const std::vector &inputs, + const std::vector &outputs) { + if (inputs_.size() != inputs.size()) { + MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != execute inputs size " << inputs.size(); + return RET_ERROR; + } + if (!outputs.empty() && outputs.size() != outputs_.size()) { + MS_LOG(ERROR) << "Graph outputs size " << outputs_.size() << " != execute outputs size " << outputs.size(); + return RET_ERROR; + } + auto ret = OnNewInputShapes(inputs); + if (ret != RET_OK) { + return ret; + } + for (size_t i = 0; i < trt_in_tensor_name_.size(); i++) { + auto trt_tensor_name = trt_in_tensor_name_[i]; + void *device_ptr = nullptr; + auto input_device_address = inputs[i].device_address(); + if (input_device_address != nullptr && input_device_address->GetMutablePtr() != nullptr) { + device_ptr = input_device_address->GetMutablePtr(); + } else { + device_ptr = runtime_->GetAllocator()->MallocDeviceMem(trt_tensor_name, inputs_[i].DataSize(), + ConvertDataType(inputs_[i].DataType())); + if (device_ptr == nullptr) { + MS_LOG(ERROR) << "realloc for input tensor device memory failed."; + return RET_ERROR; + } + ret = runtime_->GetAllocator()->SyncMemHostToDevice(inputs[i], trt_tensor_name); + if (ret != RET_OK) { + MS_LOG(ERROR) << "sync mem from host to device failed for " << trt_tensor_name; + return RET_ERROR; + } + runtime_->GetAllocator()->MarkMemValid(trt_tensor_name, true); + } + int index = this->engine_->getBindingIndex(trt_tensor_name.c_str()); + MS_LOG(INFO) << "device index " << index << " for tensor : " << trt_tensor_name << " attr: " << device_ptr; + tensor_bindings_[index] = device_ptr; + } + for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) { + const auto &trt_out_tensor_name = trt_out_tensor_name_[i]; + int index = this->engine_->getBindingIndex(trt_out_tensor_name.c_str()); + void *device_ptr = nullptr; + if (outputs.size() > i) { + auto &output = outputs[i]; + if (output.device_address() && output.device_address()->GetMutablePtr()) { + device_ptr = output.device_address()->GetMutablePtr(); + } + } + if (!device_ptr) { + device_ptr = runtime_->GetAllocator()->MallocDeviceMem(trt_out_tensor_name, outputs_[i].DataSize(), + ConvertDataType(outputs_[i].DataType())); + if (device_ptr == nullptr) { + MS_LOG(ERROR) << "realloc for outputs tensor device memory failed."; + return RET_ERROR; + } + } + tensor_bindings_[index] = device_ptr; + } + return RET_OK; +} + +int TensorRTSubGraph::PostExecute(std::vector *outputs) { + if (!outputs->empty() && outputs->size() != outputs_.size()) { + MS_LOG(ERROR) << "Graph outputs size " << outputs_.size() << " != execute outputs size " << outputs->size(); + return RET_ERROR; + } + auto has_outputs = !outputs->empty(); + for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) { + const auto &trt_out_tensor_name = trt_out_tensor_name_[i]; + int index = this->engine_->getBindingIndex(trt_out_tensor_name.c_str()); + // actual output tensor dims + auto out_dims = this->trt_context_->getBindingDimensions(index); + std::vector new_shape = lite::ConvertMSShape(out_dims); + // batchsize resize need set new batch size + if (input_batchsize_index_ != -1) { + if (runtime_->GetBatchSize() != new_shape[output_batchsize_index_]) { + new_shape[output_batchsize_index_] = runtime_->GetBatchSize(); + } + } + outputs_[i].SetShape(new_shape); + for (int od = 0; od < out_dims.nbDims; od++) { + MS_LOG(DEBUG) << "out tensor " << trt_out_tensor_name << " dims at " << od << " is " << new_shape[od]; + } + runtime_->GetAllocator()->MarkMemValid(trt_out_tensor_name, true); + if (has_outputs) { + auto &tensor = outputs->at(i); + auto dst_device = tensor.device_address(); + if (dst_device == nullptr || dst_device->GetMutablePtr() == nullptr) { + if (tensor.Size() < outputs_[i].DataSize()) { + MS_LOG(ERROR) << "Parameter output data size " << tensor.Size() + << " cannot less than execute output data size " << outputs_[i].DataSize() + << ", output shape: " << new_shape; + return RET_ERROR; + } + auto host_address = tensor.data_c(); + if (host_address == nullptr) { + MS_LOG(ERROR) << "Specified output device or host address cannot be nullptr"; + return RET_ERROR; + } + int sync_ret = + runtime_->GetAllocator()->SyncMemDeviceToHost(host_address, outputs_[i].DataSize(), trt_out_tensor_name); + if (sync_ret != RET_OK) { + MS_LOG(ERROR) << "sync mem from device to host failed for " << trt_out_tensor_name; + return sync_ret; + } + } + } else { + tensor::Tensor output_tensor(static_cast(outputs_[i].DataType()), new_shape); + int sync_ret = runtime_->GetAllocator()->SyncMemDeviceToHost(&output_tensor, trt_out_tensor_name); + if (sync_ret != RET_OK) { + MS_LOG(ERROR) << "sync mem from device to host failed for " << trt_out_tensor_name; + return sync_ret; + } + outputs->push_back(output_tensor); + } + runtime_->GetAllocator()->MarkMemValid(trt_out_tensor_name, false); + } + // make mem invalid, prepare for next execute + for (size_t i = 0; i < inputs_.size(); i++) { + runtime_->GetAllocator()->MarkMemValid(trt_in_tensor_name_[i], false); + } return RET_OK; } @@ -612,63 +720,20 @@ bool TensorRTSubGraph::ValidInputResizeDims(const nvinfer1::Dims &construct_dims } int TensorRTSubGraph::Execute(const std::vector &inputs, std::vector *outputs) { - int ret = ReSizeIfNeed(inputs); + int ret = lite::SetCudaDevice(device_info_); if (ret != RET_OK) { return ret; } - ret = lite::SetCudaDevice(device_info_); + outputs->clear(); + ret = PreExecute(inputs, *outputs); if (ret != RET_OK) { return ret; } - for (size_t i = 0; i < inputs.size(); i++) { - if (runtime_->GetAllocator()->GetMemIsValid(trt_in_tensor_name_[i])) { - MS_LOG(INFO) << "no need memcpy to cuda for input tensor: " << trt_in_tensor_name_[i]; - continue; - } - ret = runtime_->GetAllocator()->SyncMemHostToDevice(inputs[i], trt_in_tensor_name_[i]); - if (ret != RET_OK) { - MS_LOG(ERROR) << "sync mem from host to device failed for " << trt_in_tensor_name_[i]; - return ret; - } - runtime_->GetAllocator()->MarkMemValid(trt_in_tensor_name_[i], true); - } - if (!this->trt_context_->executeV2(tensor_bindings_)) { MS_LOG(ERROR) << "TensorRT execute failed."; return RET_ERROR; } - - for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) { - int index = this->engine_->getBindingIndex(trt_out_tensor_name_[i].c_str()); - // actual output tensor dims - auto out_dims = this->trt_context_->getBindingDimensions(index); - std::vector new_shape = lite::ConvertMSShape(out_dims); - // batchsize resize need set new batch size - if (input_batchsize_index_ != -1) { - if (runtime_->GetBatchSize() != new_shape[output_batchsize_index_]) { - new_shape[output_batchsize_index_] = runtime_->GetBatchSize(); - } - } - for (int od = 0; od < out_dims.nbDims; od++) { - MS_LOG(DEBUG) << "out tensor " << trt_out_tensor_name_[i] << " dims at " << od << " is " << new_shape[od]; - } - tensor::Tensor output_tensor(static_cast(outputs_[i].DataType()), new_shape); - outputs_[i].SetShape(new_shape); - - runtime_->GetAllocator()->MarkMemValid(trt_out_tensor_name_[i], true); - int sync_ret = runtime_->GetAllocator()->SyncMemDeviceToHost(&output_tensor, trt_out_tensor_name_[i]); - if (sync_ret != RET_OK) { - MS_LOG(ERROR) << "sync mem from device to host failed for " << trt_out_tensor_name_[i]; - return sync_ret; - } - runtime_->GetAllocator()->MarkMemValid(trt_out_tensor_name_[i], false); - outputs->push_back(output_tensor); - } - // make mem invalid, prepare for next execute - for (size_t i = 0; i < inputs_.size(); i++) { - runtime_->GetAllocator()->MarkMemValid(trt_in_tensor_name_[i], false); - } - return RET_OK; + return PostExecute(outputs); } ITensorHelper TensorRTSubGraph::FindTensorRTInputs(TensorRTOp *cur_op, const TensorInfo &in_tensor) { diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.h b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.h index 3d033a16e40..18542ff5cff 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.h +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.h @@ -50,9 +50,6 @@ class TensorRTSubGraph { int Execute(const std::vector &inputs, std::vector *outputs); - int ReSizeIfNeed(const std::vector &inputs); - int ReSize(const std::vector &inputs); - int BuildTensorRTGraph(); int Init(cudaStream_t stream); @@ -92,6 +89,11 @@ class TensorRTSubGraph { bool ValidInputResizeDims(const nvinfer1::Dims &construct_dims, const std::vector &resize_input_shape); + int PreExecute(const std::vector &inputs, const std::vector &outputs); + int PostExecute(std::vector *outputs); + + int OnNewInputShapes(const std::vector &inputs); + std::string name_; std::vector inputs_; std::vector outputs_; diff --git a/mindspore/lite/src/extendrt/infer_session.cc b/mindspore/lite/src/extendrt/infer_session.cc index e902b4d0dbd..b3375f0cd92 100644 --- a/mindspore/lite/src/extendrt/infer_session.cc +++ b/mindspore/lite/src/extendrt/infer_session.cc @@ -43,15 +43,15 @@ class DefaultInferSession : public InferSession { Status Init(const std::shared_ptr context) override; Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override; Status RunGraph() override; - Status RunGraph(const std::vector &inputs, std::vector *outputs) override; + Status RunGraph(const std::vector &inputs, std::vector *outputs) override; Status Resize(const std::vector &inputs, const std::vector> &dims) override; - std::vector GetOutputs() override; - std::vector GetInputs() override; + std::vector GetOutputs() override; + std::vector GetInputs() override; std::vector GetOutputNames() override; std::vector GetInputNames() override; - tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) override; - tensor::TensorPtr GetInputByTensorName(const std::string &name) override; + MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override; + MutableTensorImplPtr GetInputByTensorName(const std::string &name) override; private: KernelGraphUtilsPtr kernel_graph_utils_; @@ -71,20 +71,19 @@ Status DefaultInferSession::CompileGraph(FuncGraphPtr graph, const void *data, s } Status DefaultInferSession::RunGraph() { return kSuccess; } -Status DefaultInferSession::RunGraph(const std::vector &inputs, - std::vector *outputs) { +Status DefaultInferSession::RunGraph(const std::vector &inputs, std::vector *outputs) { return kSuccess; } Status DefaultInferSession::Resize(const std::vector &inputs, const std::vector> &dims) { return kSuccess; } -std::vector DefaultInferSession::GetOutputs() { return std::vector(); } -std::vector DefaultInferSession::GetInputs() { return std::vector(); } +std::vector DefaultInferSession::GetOutputs() { return {}; } +std::vector DefaultInferSession::GetInputs() { return {}; } std::vector DefaultInferSession::GetOutputNames() { return std::vector(); } std::vector DefaultInferSession::GetInputNames() { return std::vector(); } -tensor::TensorPtr DefaultInferSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; } -tensor::TensorPtr DefaultInferSession::GetInputByTensorName(const std::string &name) { return nullptr; } +MutableTensorImplPtr DefaultInferSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; } +MutableTensorImplPtr DefaultInferSession::GetInputByTensorName(const std::string &name) { return nullptr; } std::shared_ptr InferSession::CreateSession(const std::shared_ptr context) { HandleGPUContext(context); auto config = SelectSessionArg(context); diff --git a/mindspore/lite/src/extendrt/infer_session.h b/mindspore/lite/src/extendrt/infer_session.h index b83e45af37f..5da8cd53cf8 100644 --- a/mindspore/lite/src/extendrt/infer_session.h +++ b/mindspore/lite/src/extendrt/infer_session.h @@ -27,6 +27,8 @@ #include "ir/func_graph.h" #include "backend/graph_compiler/graph_partition.h" #include "extendrt/session/type.h" +#include "common/mutable_tensor_impl.h" +#include "extendrt/utils/kernel_graph_utils.h" namespace mindspore { class InferSession : public std::enable_shared_from_this { @@ -37,16 +39,16 @@ class InferSession : public std::enable_shared_from_this { virtual Status Init(const std::shared_ptr context) = 0; virtual Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) = 0; virtual Status RunGraph() = 0; - virtual Status RunGraph(const std::vector &inputs, std::vector *outputs) = 0; + virtual Status RunGraph(const std::vector &inputs, std::vector *outputs) = 0; virtual Status Resize(const std::vector &inputs, const std::vector> &dims) = 0; - virtual std::vector GetOutputs() = 0; - virtual std::vector GetInputs() = 0; + virtual std::vector GetOutputs() = 0; + virtual std::vector GetInputs() = 0; virtual std::vector GetOutputNames() = 0; virtual std::vector GetInputNames() = 0; - virtual tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) = 0; - virtual tensor::TensorPtr GetInputByTensorName(const std::string &name) = 0; + virtual MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) = 0; + virtual MutableTensorImplPtr GetInputByTensorName(const std::string &name) = 0; protected: FuncGraphPtr graph_; diff --git a/mindspore/lite/src/extendrt/session/delegate_session.cc b/mindspore/lite/src/extendrt/session/delegate_session.cc index a092b6e5094..63f26bc1977 100644 --- a/mindspore/lite/src/extendrt/session/delegate_session.cc +++ b/mindspore/lite/src/extendrt/session/delegate_session.cc @@ -28,20 +28,19 @@ Status DelegateSession::Init(const std::shared_ptr context) { return kS Status DelegateSession::CompileGraph(FuncGraphPtr graph, const void *data, size_t size) { return kSuccess; } Status DelegateSession::RunGraph() { return kSuccess; } -Status DelegateSession::RunGraph(const std::vector &inputs, - std::vector *outputs) { +Status DelegateSession::RunGraph(const std::vector &inputs, std::vector *outputs) { return kSuccess; } Status DelegateSession::Resize(const std::vector &inputs, const std::vector> &dims) { return kSuccess; } -std::vector DelegateSession::GetOutputs() { return std::vector(); } -std::vector DelegateSession::GetInputs() { return std::vector(); } +std::vector DelegateSession::GetOutputs() { return {}; } +std::vector DelegateSession::GetInputs() { return {}; } std::vector DelegateSession::GetOutputNames() { return std::vector(); } std::vector DelegateSession::GetInputNames() { return std::vector(); } -tensor::TensorPtr DelegateSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; } -tensor::TensorPtr DelegateSession::GetInputByTensorName(const std::string &name) { return nullptr; } +MutableTensorImplPtr DelegateSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; } +MutableTensorImplPtr DelegateSession::GetInputByTensorName(const std::string &name) { return nullptr; } static std::shared_ptr DelegateSessionCreator(const SessionConfig &config) { auto delegates = config.delegates_; diff --git a/mindspore/lite/src/extendrt/session/delegate_session.h b/mindspore/lite/src/extendrt/session/delegate_session.h index 7053c91cb87..1f9d56a8ca9 100644 --- a/mindspore/lite/src/extendrt/session/delegate_session.h +++ b/mindspore/lite/src/extendrt/session/delegate_session.h @@ -32,15 +32,15 @@ class DelegateSession : public InferSession { Status Init(const std::shared_ptr context) override; Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override; Status RunGraph() override; - Status RunGraph(const std::vector &inputs, std::vector *outputs) override; + Status RunGraph(const std::vector &inputs, std::vector *outputs) override; Status Resize(const std::vector &inputs, const std::vector> &dims) override; - std::vector GetOutputs() override; - std::vector GetInputs() override; + std::vector GetOutputs() override; + std::vector GetInputs() override; std::vector GetOutputNames() override; std::vector GetInputNames() override; - tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) override; - tensor::TensorPtr GetInputByTensorName(const std::string &name) override; + MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override; + MutableTensorImplPtr GetInputByTensorName(const std::string &name) override; private: std::shared_ptr delegate_; diff --git a/mindspore/lite/src/extendrt/session/graph_executor_session.cc b/mindspore/lite/src/extendrt/session/graph_executor_session.cc index b95beac3dd8..0099e38efd3 100644 --- a/mindspore/lite/src/extendrt/session/graph_executor_session.cc +++ b/mindspore/lite/src/extendrt/session/graph_executor_session.cc @@ -19,8 +19,8 @@ #include #include "extendrt/session/graph_executor_session.h" -#include "extendrt/utils/tensor_utils.h" #include "src/extendrt/utils/kernel_build_utils.h" +#include "extendrt/utils/tensor_default_impl.h" namespace mindspore { Status GraphExecutorSession::Init(const std::shared_ptr context) { @@ -38,39 +38,57 @@ Status GraphExecutorSession::CompileGraph(FuncGraphPtr graph, const void *data, for (const auto &kernel_node : kernel_nodes) { mindspore::infer::SetKernelInfo(kernel_node); } - if (graph_executor_->CompileGraph(kernel_graph_, options_)) { - kernel_graph_utils_->GetModelInputsInfo(kernel_graph_->graph_id(), &inputs_, &input_names_); - kernel_graph_utils_->GetModelOutputsInfo(kernel_graph_->graph_id(), &outputs_, &output_names_); - return kSuccess; + if (!graph_executor_->CompileGraph(kernel_graph_, options_)) { + return kCoreFailed; } - return kCoreFailed; + std::vector graph_inputs, graph_outputs; + kernel_graph_utils_->GetModelInputsInfo(kernel_graph_->graph_id(), &graph_inputs, &input_names_); + kernel_graph_utils_->GetModelOutputsInfo(kernel_graph_->graph_id(), &graph_outputs, &output_names_); + if (graph_inputs.size() != input_names_.size()) { + MS_LOG(ERROR) << "Graph input size " << graph_inputs.size() << " != input names size " << input_names_.size(); + return kCoreFailed; + } + if (graph_outputs.size() != output_names_.size()) { + MS_LOG(ERROR) << "Graph output size " << graph_outputs.size() << " != output names size " << output_names_.size(); + return kCoreFailed; + } + for (size_t i = 0; i < input_names_.size(); i++) { + auto &input = graph_inputs[i]; + auto data_type = static_cast(input->data_type()); + auto impl = std::make_shared(input_names_[i], data_type, input->shape_c()); + inputs_.push_back(impl); + } + for (size_t i = 0; i < output_names_.size(); i++) { + auto &output = graph_outputs[i]; + auto data_type = static_cast(output->data_type()); + auto impl = std::make_shared(output_names_[i], data_type, output->shape_c()); + outputs_.push_back(impl); + } + return kSuccess; } Status GraphExecutorSession::RunGraph() { return kSuccess; } -Status GraphExecutorSession::RunGraph(const std::vector &inputs, - std::vector *outputs) { + +Status GraphExecutorSession::RunGraph(const std::vector &inputs, std::vector *outputs) { MS_LOG(INFO) << "GraphExecutorSession::RunGraph"; + MS_EXCEPTION_IF_NULL(graph_executor_); MS_EXCEPTION_IF_NULL(outputs); - std::vector executor_inputs, executor_outputs; - executor_inputs = TensorUtils::TensorPtrToTensor(inputs); - auto ret = graph_executor_->RunGraph(kernel_graph_, executor_inputs, &executor_outputs, options_); + auto ret = graph_executor_->RunGraph(kernel_graph_, inputs, outputs, options_); if (!ret) { return kCoreFailed; } - *outputs = TensorUtils::TensorToTensorPtr(executor_outputs); - inputs_ = inputs; - outputs_ = *outputs; return kSuccess; } + Status GraphExecutorSession::Resize(const std::vector &inputs, const std::vector> &dims) { return kSuccess; } -std::vector GraphExecutorSession::GetOutputs() { return outputs_; } -std::vector GraphExecutorSession::GetInputs() { return inputs_; } +std::vector GraphExecutorSession::GetOutputs() { return outputs_; } +std::vector GraphExecutorSession::GetInputs() { return inputs_; } std::vector GraphExecutorSession::GetOutputNames() { return output_names_; } std::vector GraphExecutorSession::GetInputNames() { return input_names_; } -tensor::TensorPtr GraphExecutorSession::GetOutputByTensorName(const std::string &tensorName) { +MutableTensorImplPtr GraphExecutorSession::GetOutputByTensorName(const std::string &tensorName) { for (size_t i = 0; i < output_names_.size(); i++) { if (output_names_[i] == tensorName) { return outputs_[i]; @@ -78,7 +96,7 @@ tensor::TensorPtr GraphExecutorSession::GetOutputByTensorName(const std::string } return nullptr; } -tensor::TensorPtr GraphExecutorSession::GetInputByTensorName(const std::string &name) { +MutableTensorImplPtr GraphExecutorSession::GetInputByTensorName(const std::string &name) { for (size_t i = 0; i < input_names_.size(); i++) { if (input_names_[i] == name) { return inputs_[i]; diff --git a/mindspore/lite/src/extendrt/session/graph_executor_session.h b/mindspore/lite/src/extendrt/session/graph_executor_session.h index e589e2894e1..86124183ded 100644 --- a/mindspore/lite/src/extendrt/session/graph_executor_session.h +++ b/mindspore/lite/src/extendrt/session/graph_executor_session.h @@ -36,24 +36,24 @@ class GraphExecutorSession : public DelegateSession { Status Init(const std::shared_ptr context) override; Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override; Status RunGraph() override; - Status RunGraph(const std::vector &inputs, std::vector *outputs) override; + Status RunGraph(const std::vector &inputs, std::vector *outputs) override; Status Resize(const std::vector &inputs, const std::vector> &dims) override; - std::vector GetOutputs() override; - std::vector GetInputs() override; + std::vector GetOutputs() override; + std::vector GetInputs() override; std::vector GetOutputNames() override; std::vector GetInputNames() override; - tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) override; - tensor::TensorPtr GetInputByTensorName(const std::string &name) override; + MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override; + MutableTensorImplPtr GetInputByTensorName(const std::string &name) override; private: std::shared_ptr graph_executor_; std::map options_; KernelGraphUtilsPtr kernel_graph_utils_; KernelGraphPtr kernel_graph_; - std::vector inputs_; + std::vector inputs_; std::vector input_names_; - std::vector outputs_; + std::vector outputs_; std::vector output_names_; }; } // namespace mindspore diff --git a/mindspore/lite/src/extendrt/session/lite_infer_session.cc b/mindspore/lite/src/extendrt/session/lite_infer_session.cc index 11ce4b1b0d5..d3a361009e7 100644 --- a/mindspore/lite/src/extendrt/session/lite_infer_session.cc +++ b/mindspore/lite/src/extendrt/session/lite_infer_session.cc @@ -129,8 +129,7 @@ Status LiteInferSession::RunGraph() { auto ret = lite_session_->RunGraph(); return static_cast(ret); } -Status LiteInferSession::RunGraph(const std::vector &inputs, - std::vector *outputs) { +Status LiteInferSession::RunGraph(const std::vector &inputs, std::vector *outputs) { MS_LOG(INFO) << "SingleOpInferSession::RunGraph with input and outputs"; MS_EXCEPTION_IF_NULL(outputs); MS_EXCEPTION_IF_NULL(lite_session_); @@ -145,7 +144,7 @@ Status LiteInferSession::RunGraph(const std::vector &inputs, std::vector old_data; for (size_t i = 0; i < inputs.size(); i++) { auto input = input_tensors.at(i); - auto user_input = inputs.at(i); + auto user_input = &inputs[i]; if (user_input->data_type() != input->data_type()) { ResetTensorData(old_data, input_tensors); MS_LOG(EXCEPTION) << "Tensor " << user_input->id() << " has a different data type from input" @@ -200,7 +199,7 @@ Status LiteInferSession::RunGraph(const std::vector &inputs, return kLiteError; } outputs->clear(); - *outputs = TensorUtils::MSTensorToTensorPtr(res); + *outputs = TensorUtils::MSTensorToTensor(res); return kSuccess; } Status LiteInferSession::Resize(const std::vector &inputs, @@ -208,37 +207,23 @@ Status LiteInferSession::Resize(const std::vector &inputs, return kSuccess; } -std::vector LiteInferSession::GetOutputs() { +std::vector LiteInferSession::GetOutputs() { auto outputs = lite_session_->GetOutputs(); - std::vector output_tensors; + std::vector output_tensors; for (auto &iter : outputs) { auto output = iter.second; - auto type_id = output->data_type(); - auto shape = output->shape(); - ShapeVector shape_vec; - std::transform(shape.begin(), shape.end(), std::back_inserter(shape_vec), - [](int s) { return static_cast(s); }); - auto data = output->data(); - auto data_size = output->Size(); - auto tensor_ptr = std::make_shared(type_id, shape_vec, data, data_size); - output_tensors.emplace_back(tensor_ptr); + auto impl = std::make_shared(output); + output_tensors.emplace_back(impl); } return output_tensors; } -std::vector LiteInferSession::GetInputs() { +std::vector LiteInferSession::GetInputs() { auto inputs = lite_session_->GetInputs(); - std::vector input_tensors; + std::vector input_tensors; for (auto &input : inputs) { - auto type_id = input->data_type(); - auto shape = input->shape(); - ShapeVector shape_vec; - std::transform(shape.begin(), shape.end(), std::back_inserter(shape_vec), - [](int s) { return static_cast(s); }); - auto data = input->data(); - auto data_size = input->Size(); - auto tensor_ptr = std::make_shared(type_id, shape_vec, data, data_size); - input_tensors.emplace_back(tensor_ptr); + auto impl = std::make_shared(input); + input_tensors.emplace_back(impl); } return input_tensors; } @@ -252,8 +237,26 @@ std::vector LiteInferSession::GetOutputNames() { } std::vector LiteInferSession::GetInputNames() { return ConvertToTensorNames(lite_session_->GetInputs()); } -tensor::TensorPtr LiteInferSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; } -tensor::TensorPtr LiteInferSession::GetInputByTensorName(const std::string &name) { return nullptr; } +MutableTensorImplPtr LiteInferSession::GetOutputByTensorName(const std::string &name) { + auto outputs = lite_session_->GetOutputs(); + for (auto &iter : outputs) { + auto output = iter.second; + if (output->tensor_name() == name) { + return std::make_shared(output); + } + } + return nullptr; +} + +MutableTensorImplPtr LiteInferSession::GetInputByTensorName(const std::string &name) { + auto inputs = lite_session_->GetInputs(); + for (auto &input : inputs) { + if (input->tensor_name() == name) { + return std::make_shared(input); + } + } + return nullptr; +} std::shared_ptr LiteInferSession::CreateLiteSession(lite::InnerContext *context) { auto session = std::make_shared(); diff --git a/mindspore/lite/src/extendrt/session/lite_infer_session.h b/mindspore/lite/src/extendrt/session/lite_infer_session.h index a1fbe6a7009..e6a8a0e750f 100644 --- a/mindspore/lite/src/extendrt/session/lite_infer_session.h +++ b/mindspore/lite/src/extendrt/session/lite_infer_session.h @@ -32,15 +32,15 @@ class LiteInferSession : public InferSession { Status Init(const std::shared_ptr context) override; Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override; Status RunGraph() override; - Status RunGraph(const std::vector &inputs, std::vector *outputs) override; + Status RunGraph(const std::vector &inputs, std::vector *outputs) override; Status Resize(const std::vector &inputs, const std::vector> &dims) override; - std::vector GetOutputs() override; - std::vector GetInputs() override; + std::vector GetOutputs() override; + std::vector GetInputs() override; std::vector GetOutputNames() override; std::vector GetInputNames() override; - tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) override; - tensor::TensorPtr GetInputByTensorName(const std::string &name) override; + MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override; + MutableTensorImplPtr GetInputByTensorName(const std::string &name) override; private: std::shared_ptr CreateLiteSession(lite::InnerContext *context); @@ -54,10 +54,6 @@ class LiteInferSession : public InferSession { private: std::shared_ptr lite_session_; std::shared_ptr context_; - std::vector inputs_; - std::vector input_names_; - std::vector outputs_; - std::vector output_names_; }; } // namespace mindspore diff --git a/mindspore/lite/src/extendrt/single_op_session.cc b/mindspore/lite/src/extendrt/single_op_session.cc index 3b4236a7aac..e9d894cc95b 100644 --- a/mindspore/lite/src/extendrt/single_op_session.cc +++ b/mindspore/lite/src/extendrt/single_op_session.cc @@ -31,6 +31,7 @@ #include "src/extendrt/kernel/ascend/plugin/ascend_kernel_plugin.h" #include "extendrt/session/factory.h" #include "extendrt/utils/runtime_utils.h" +#include "extendrt/utils/tensor_default_impl.h" namespace mindspore { const size_t tensor_max_size = 0x1000000; @@ -131,15 +132,34 @@ Status SingleOpInferSession::CompileGraph(FuncGraphPtr graph, const void *data, RuntimeUtils::AssignKernelGraphAddress(kernel_graph_); - kernel_graph_utils_->GetModelInputsInfo(kernel_graph_->graph_id(), &inputs_, &input_names_); - kernel_graph_utils_->GetModelOutputsInfo(kernel_graph_->graph_id(), &outputs_, &output_names_); - + std::vector graph_inputs, graph_outputs; + kernel_graph_utils_->GetModelInputsInfo(kernel_graph_->graph_id(), &graph_inputs, &input_names_); + kernel_graph_utils_->GetModelOutputsInfo(kernel_graph_->graph_id(), &graph_outputs, &output_names_); + if (graph_inputs.size() != input_names_.size()) { + MS_LOG(ERROR) << "Graph input size " << graph_inputs.size() << " != input names size " << input_names_.size(); + return kCoreFailed; + } + if (graph_outputs.size() != output_names_.size()) { + MS_LOG(ERROR) << "Graph output size " << graph_outputs.size() << " != output names size " << output_names_.size(); + return kCoreFailed; + } + for (size_t i = 0; i < input_names_.size(); i++) { + auto &input = graph_inputs[i]; + auto data_type = static_cast(input->data_type()); + auto impl = std::make_shared(input_names_[i], data_type, input->shape_c()); + inputs_.push_back(impl); + } + for (size_t i = 0; i < output_names_.size(); i++) { + auto &output = graph_outputs[i]; + auto data_type = static_cast(output->data_type()); + auto impl = std::make_shared(output_names_[i], data_type, output->shape_c()); + outputs_.push_back(impl); + } return kSuccess; } Status SingleOpInferSession::RunGraph() { return kSuccess; } -Status SingleOpInferSession::RunGraph(const std::vector &inputs, - std::vector *outputs) { +Status SingleOpInferSession::RunGraph(const std::vector &inputs, std::vector *outputs) { MS_LOG(INFO) << "SingleOpInferSession::RunGraph with input and outputs"; MS_EXCEPTION_IF_NULL(kernel_graph_); @@ -179,8 +199,6 @@ Status SingleOpInferSession::RunGraph(const std::vector &inpu } RuntimeUtils::CopyOutputTensorsFromKernelGraph(outputs, kernel_graph_); - outputs_ = *outputs; - return kSuccess; } @@ -217,7 +235,7 @@ Status SingleOpInferSession::ResizeGraphInputs(const std::vectorSetSize(tensor_size); } // update input shape - inputs_[i]->set_shape(dims[i]); + inputs_[i]->SetShape(dims[i]); auto abstract = std::make_shared(TypeIdToType(type_id), dims[i]); if (abstract == nullptr) { MS_LOG(ERROR) << "Abstract is nullptr."; @@ -248,13 +266,12 @@ Status SingleOpInferSession::Resize(const std::vector &inputs } return kSuccess; } - -std::vector SingleOpInferSession::GetOutputs() { return outputs_; } -std::vector SingleOpInferSession::GetInputs() { return inputs_; } +std::vector SingleOpInferSession::GetOutputs() { return outputs_; } +std::vector SingleOpInferSession::GetInputs() { return inputs_; } std::vector SingleOpInferSession::GetOutputNames() { return output_names_; } std::vector SingleOpInferSession::GetInputNames() { return input_names_; } -tensor::TensorPtr SingleOpInferSession::GetOutputByTensorName(const std::string &tensor_name) { +MutableTensorImplPtr SingleOpInferSession::GetOutputByTensorName(const std::string &tensor_name) { for (size_t idx = 0; idx < output_names_.size(); ++idx) { if (output_names_[idx] == tensor_name) { if (idx < outputs_.size()) { @@ -266,7 +283,7 @@ tensor::TensorPtr SingleOpInferSession::GetOutputByTensorName(const std::string return nullptr; } -tensor::TensorPtr SingleOpInferSession::GetInputByTensorName(const std::string &tensor_name) { +MutableTensorImplPtr SingleOpInferSession::GetInputByTensorName(const std::string &tensor_name) { for (size_t idx = 0; idx < input_names_.size(); ++idx) { if (input_names_[idx] == tensor_name) { if (idx < inputs_.size()) { diff --git a/mindspore/lite/src/extendrt/single_op_session.h b/mindspore/lite/src/extendrt/single_op_session.h index 93941419ea8..f1fc5598ad0 100644 --- a/mindspore/lite/src/extendrt/single_op_session.h +++ b/mindspore/lite/src/extendrt/single_op_session.h @@ -32,24 +32,24 @@ class SingleOpInferSession : public InferSession { Status AscendInit(const std::shared_ptr &context); Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override; Status RunGraph() override; - Status RunGraph(const std::vector &inputs, std::vector *outputs) override; + Status RunGraph(const std::vector &inputs, std::vector *outputs) override; Status Resize(const std::vector &inputs, const std::vector> &dims) override; - std::vector GetOutputs() override; - std::vector GetInputs() override; + std::vector GetOutputs() override; + std::vector GetInputs() override; std::vector GetOutputNames() override; std::vector GetInputNames() override; - tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) override; - tensor::TensorPtr GetInputByTensorName(const std::string &name) override; + MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override; + MutableTensorImplPtr GetInputByTensorName(const std::string &name) override; private: Status ResizeGraphInputs(const std::vector &inputs, const std::vector> &dims); KernelGraphUtilsPtr kernel_graph_utils_; KernelGraphPtr kernel_graph_; - std::vector inputs_; + std::vector inputs_; std::vector input_names_; - std::vector outputs_; + std::vector outputs_; std::vector output_names_; uint32_t device_id_ = 0; }; diff --git a/mindspore/lite/src/extendrt/utils/kernel_graph_utils.cc b/mindspore/lite/src/extendrt/utils/kernel_graph_utils.cc index c52db1e335b..1470f67b0ba 100644 --- a/mindspore/lite/src/extendrt/utils/kernel_graph_utils.cc +++ b/mindspore/lite/src/extendrt/utils/kernel_graph_utils.cc @@ -914,9 +914,10 @@ void KernelGraphUtils::GetModelInputsInfo(uint32_t graph_id, std::vectorGetOutputDeviceType(0); auto ms_tensor = std::make_shared(data_type, input_shape); - inputs->push_back(ms_tensor); auto abstract = parameter->abstract(); MS_EXCEPTION_IF_NULL(abstract); + ms_tensor->set_name(abstract->name()); + inputs->push_back(ms_tensor); inputs_name->push_back(abstract->name()); } } @@ -972,6 +973,12 @@ void KernelGraphUtils::GetModelOutputsInfo(uint32_t graph_id, std::vectorsize() != output_names->size()) { + MS_LOG_EXCEPTION << "Output tensor size " << outputs->size() << " != output name size " << output_names->size(); + } + for (size_t i = 0; i < outputs->size(); i++) { + outputs->at(i)->set_name(output_names->at(i)); + } } CNodePtr KernelGraphUtils::CreateNewCNode(const CNodePtr &cnode, KernelGraphPtr graph, diff --git a/mindspore/lite/src/extendrt/utils/runtime_utils.cc b/mindspore/lite/src/extendrt/utils/runtime_utils.cc index 210873cc72f..6b1d84f2433 100644 --- a/mindspore/lite/src/extendrt/utils/runtime_utils.cc +++ b/mindspore/lite/src/extendrt/utils/runtime_utils.cc @@ -64,7 +64,7 @@ std::vector RuntimeUtils::GetGraphDataInputs(const KernelGraphPtr &k return data_inputs; } -void RuntimeUtils::CopyInputTensorsToKernelGraph(const std::vector &inputs, +void RuntimeUtils::CopyInputTensorsToKernelGraph(const std::vector &inputs, KernelGraphPtr kernel_graph) { MS_EXCEPTION_IF_NULL(kernel_graph); auto graph_inputs = GetGraphDataInputs(kernel_graph); @@ -74,20 +74,20 @@ void RuntimeUtils::CopyInputTensorsToKernelGraph(const std::vectorptr_ == nullptr) { MS_LOG(EXCEPTION) << "Output_idx" << i << " of input " << graph_input->DebugString() << " output addr ptr is nullptr."; } - memcpy(graph_input_addr->ptr_, input->data_c(), graph_input_addr->size_); + memcpy(graph_input_addr->ptr_, input.data_c(), graph_input_addr->size_); } } -void RuntimeUtils::CopyOutputTensorsFromKernelGraph(std::vector *outputs, - KernelGraphPtr kernel_graph) { +void RuntimeUtils::CopyOutputTensorsFromKernelGraph(std::vector *outputs, KernelGraphPtr kernel_graph) { MS_EXCEPTION_IF_NULL(kernel_graph); + outputs->clear(); auto graph_outputs = kernel_graph->outputs(); for (auto graph_output : graph_outputs) { auto real_output_with_index = common::AnfAlgo::VisitKernelWithReturnType(graph_output, 0); @@ -104,8 +104,7 @@ void RuntimeUtils::CopyOutputTensorsFromKernelGraph(std::vector(us); shape.push_back(s); } - auto tensor_ptr = std::make_shared(type_id, shape, data, data_size); - outputs->push_back(tensor_ptr); + outputs->emplace_back(mindspore::tensor::Tensor(type_id, shape, data, data_size)); } } diff --git a/mindspore/lite/src/extendrt/utils/runtime_utils.h b/mindspore/lite/src/extendrt/utils/runtime_utils.h index 49c1d8cb30f..a5a316c14a3 100644 --- a/mindspore/lite/src/extendrt/utils/runtime_utils.h +++ b/mindspore/lite/src/extendrt/utils/runtime_utils.h @@ -37,8 +37,8 @@ class RuntimeUtils { static kernel::AddressPtr GetAddressFromDevice(device::DeviceAddressPtr address_ptr); static std::vector GetGraphDataInputs(const KernelGraphPtr &kernel_graph); - static void CopyInputTensorsToKernelGraph(const std::vector &inputs, KernelGraphPtr kernel_graph); - static void CopyOutputTensorsFromKernelGraph(std::vector *outputs, KernelGraphPtr kernel_graph); + static void CopyInputTensorsToKernelGraph(const std::vector &inputs, KernelGraphPtr kernel_graph); + static void CopyOutputTensorsFromKernelGraph(std::vector *outputs, KernelGraphPtr kernel_graph); static void AssignKernelGraphAddress(KernelGraphPtr kernel_graph); static void AssignValueNodeAddress(KernelGraphPtr kernel_graph); diff --git a/mindspore/lite/src/extendrt/utils/tensor_default_impl.h b/mindspore/lite/src/extendrt/utils/tensor_default_impl.h new file mode 100644 index 00000000000..69867afb72e --- /dev/null +++ b/mindspore/lite/src/extendrt/utils/tensor_default_impl.h @@ -0,0 +1,141 @@ +/** + * This is the C++ adaptation and derivative work of Myia (https://github.com/mila-iqia/myia/). + * + * Copyright 2019-2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_EXTENDRT_UTILS_TENSOR_DEFAULT_IMPL_H_ +#define MINDSPORE_LITE_SRC_EXTENDRT_UTILS_TENSOR_DEFAULT_IMPL_H_ + +#include +#include +#include +#include + +#include "include/api/types.h" +#include "ir/tensor.h" +#include "runtime/device/device_address.h" +#include "common/utils.h" +#include "common/mutable_tensor_impl.h" + +namespace mindspore { +class TensorDefaultImpl : public MutableTensorImpl { + public: + TensorDefaultImpl() = default; + TensorDefaultImpl(const std::string &name, enum DataType type, const std::vector &shape) + : name_(name), type_(type), shape_(shape) { + buffer_.SetData(nullptr, 0); + data_ = buffer_.Data(); + } + + TensorDefaultImpl(const std::string &name, enum DataType type, const std::vector &shape, const void *data, + size_t data_len, bool ref_data, bool own_data) + : name_(name), type_(type), shape_(shape) { + if (ref_data) { + data_ = data; + own_data_ = own_data; + } else { + if (data == nullptr) { + data_len = 0; + } + buffer_.SetData(data, data_len); + data_ = buffer_.Data(); + } + } + ~TensorDefaultImpl() { + if (own_data_ && data_ != nullptr && data_ != buffer_.Data()) { + free(const_cast(data_)); + } + } + void SetShape(const std::vector &shape) override { shape_ = shape; } + void SetDataType(mindspore::DataType data_type) override { type_ = data_type; } + void SetName(const std::string &name) override { name_ = name; } + + mindspore::Format Format() const override { return format_; } + void SetFormat(mindspore::Format format) override { format_ = format; } + + const std::string &Name() const override { return name_; } + enum DataType DataType() const override { return type_; } + const std::vector &Shape() const override { return shape_; } + + void SetAllocator(const std::shared_ptr &allocator) override { allocator_ = allocator; } + std::shared_ptr GetAllocator() const override { return allocator_; } + + std::vector GetQuantParams() const override { return quant_param_; } + void SetQuantParams(const std::vector &quant_param) override { quant_param_ = quant_param; } + + int64_t ElementNum() const { return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies()); } + size_t DataSize() const override { return ElementNum() * lite::DataTypeSize(static_cast(type_)); } + + void SetDeviceData(void *data) override { device_data_ = data; } + void *GetDeviceData() override { return device_data_; } + bool IsConst() const override { return false; } + + bool IsDevice() const override { return device_data_ != nullptr; } + + std::shared_ptr Data() const override { + ResizeData(); + return std::shared_ptr(data_, [](const void *) {}); + } + + void SetData(void *data, bool own_data) override { + data_ = data; + own_data_ = own_data; + } + + void *MutableData() override { + ResizeData(); + return const_cast(data_); + } + + std::shared_ptr Clone() const override { + auto impl = std::make_shared(name_, type_, shape_, data_, DataSize(), false, false); + if (!impl) { + return nullptr; + } + impl->SetFormat(format_); + impl->SetQuantParams(quant_param_); + impl->SetDeviceData(device_data_); + impl->SetAllocator(allocator_); + return impl; + } + + protected: + std::string name_; + enum DataType type_ = DataType::kTypeUnknown; + enum Format format_ = mindspore::NCHW; + std::vector shape_; + std::shared_ptr allocator_ = nullptr; + std::vector quant_param_; + void *device_data_ = nullptr; + + mutable Buffer buffer_; + mutable const void *data_ = nullptr; + bool own_data_ = false; + + void ResizeData() const { + if (data_ != nullptr && data_ != buffer_.Data()) { + return; + } + auto data_size = DataSize(); + if (data_size != buffer_.DataSize()) { + buffer_.ResizeData(data_size); + } + data_ = buffer_.Data(); + } +}; +} // namespace mindspore + +#endif // MINDSPORE_LITE_SRC_EXTENDRT_UTILS_TENSOR_DEFAULT_IMPL_H_ diff --git a/mindspore/lite/src/extendrt/utils/tensor_utils.cc b/mindspore/lite/src/extendrt/utils/tensor_utils.cc index b2faee6c183..40a2806401a 100644 --- a/mindspore/lite/src/extendrt/utils/tensor_utils.cc +++ b/mindspore/lite/src/extendrt/utils/tensor_utils.cc @@ -18,10 +18,57 @@ #include #include +#include #include "extendrt/utils/tensor_utils.h" +#include "mindspore/ccsrc/kernel/common_utils.h" namespace mindspore { +TensorRefData::TensorRefData(void *data, size_t bytes_size, size_t data_size, size_t ndim) + : data_(data), elem_count_(bytes_size), data_size_(data_size), ndim_(ndim) {} + +ssize_t TensorRefData::size() const { return static_cast(elem_count_); } + +ssize_t TensorRefData::itemsize() const { + if (elem_count_ == 0) { + return 0; + } + return static_cast(data_size_ / elem_count_); +} + +ssize_t TensorRefData::nbytes() const { return static_cast(data_size_); } + +ssize_t TensorRefData::ndim() const { return static_cast(ndim_); } + +void *TensorRefData::data() { return data_; } + +const void *TensorRefData::const_data() const { return data_; } + +std::string TensorRefData::ToString(TypeId type, const ShapeVector &shape, bool use_comma) const { + std::stringstream stream; + stream << "RefTensor:["; + for (size_t i = 0; i < shape.size(); i++) { + stream << shape[i]; + if (i + 1 < shape.size()) { + stream << ","; + } + } + stream << "]" << type; + return stream.str(); +} + +mindspore::Format TensorTensorImpl::Format() const { + MS_EXCEPTION_IF_NULL(tensor_); + return kernel::GetFormatFromStrToEnum(tensor_->device_info().format_); +} + +void TensorTensorImpl::SetFormat(mindspore::Format format) { + MS_EXCEPTION_IF_NULL(tensor_); + auto device_info = tensor_->device_info(); + device_info.format_ = kernel::GetFormatFromEnumToStr(format); + tensor_->set_device_info(device_info); +} + std::vector TensorUtils::MSTensorToTensorPtr(const std::vector &ms_tensors) { std::vector tensor_ptrs; @@ -31,7 +78,8 @@ std::vector TensorUtils::MSTensorToTensorPtr(const auto shape = ms_tensor.Shape(); auto data = ms_tensor.MutableData(); auto data_size = ms_tensor.DataSize(); - auto tensor_ptr = std::make_shared(type_id, shape, data, data_size); + auto ref_tensor_data = std::make_shared(data, ms_tensor.ElementNum(), data_size, shape.size()); + auto tensor_ptr = std::make_shared(type_id, shape, ref_tensor_data); tensor_ptrs.push_back(tensor_ptr); } return tensor_ptrs; @@ -40,22 +88,46 @@ std::vector TensorUtils::MSTensorToTensorPtr(const std::vector TensorUtils::TensorPtrToMSTensor(std::vector tensor_ptrs, const std::vector &tensor_names) { std::vector ms_tensors; - for (size_t i = 0; i < tensor_ptrs.size(); i++) { auto graph_tensor = tensor_ptrs[i]; std::string graph_tensor_name = tensor_names[i]; - auto type_id = graph_tensor->data_type_c(); - auto data_type = static_cast(type_id); - auto ms_tensor_ptr = MSTensor::CreateRefTensor(graph_tensor_name, data_type, graph_tensor->shape_c(), - graph_tensor->data_c(), graph_tensor->Size()); - if (ms_tensor_ptr == nullptr) { - MS_LOG_WARNING << "Failed to create input tensor "; - return {}; - } - ms_tensors.push_back(*ms_tensor_ptr); - delete ms_tensor_ptr; + graph_tensor->set_name(graph_tensor_name); + auto tensor_impl = std::make_shared(graph_tensor); + ms_tensors.push_back(MSTensor(tensor_impl)); } + return ms_tensors; +} +std::vector TensorUtils::MSTensorToTensor(const std::vector &ms_tensors) { + std::vector tensors; + for (auto ms_tensor : ms_tensors) { + auto data_type = ms_tensor.DataType(); + auto type_id = static_cast(data_type); + auto shape = ms_tensor.Shape(); + auto data = ms_tensor.MutableData(); + auto data_size = ms_tensor.DataSize(); + auto ref_tensor_data = std::make_shared(data, ms_tensor.ElementNum(), data_size, shape.size()); + mindspore::tensor::Tensor tensor(type_id, shape, ref_tensor_data); + auto device_address = ms_tensor.GetDeviceData(); + if (device_address != nullptr) { + auto lite_device_address = std::make_shared(device_address, ms_tensor.DataSize()); + tensor.set_device_address(lite_device_address); + } + tensors.emplace_back(std::move(tensor)); + } + return tensors; +} + +std::vector TensorUtils::TensorToMSTensor(std::vector tensors, + const std::vector &tensor_names) { + std::vector ms_tensors; + for (size_t i = 0; i < tensors.size(); i++) { + auto &graph_tensor = tensors[i]; + std::string graph_tensor_name = tensor_names[i]; + graph_tensor.set_name(graph_tensor_name); + auto tensor_impl = std::make_shared(graph_tensor); + ms_tensors.emplace_back(MSTensor(tensor_impl)); + } return ms_tensors; } diff --git a/mindspore/lite/src/extendrt/utils/tensor_utils.h b/mindspore/lite/src/extendrt/utils/tensor_utils.h index 7b4787107a1..e374c176a0c 100644 --- a/mindspore/lite/src/extendrt/utils/tensor_utils.h +++ b/mindspore/lite/src/extendrt/utils/tensor_utils.h @@ -21,11 +21,160 @@ #include #include +#include +#include #include "include/api/types.h" #include "ir/tensor.h" +#include "runtime/device/device_address.h" +#include "common/utils.h" +#include "common/mutable_tensor_impl.h" +#include "mindspore/core/ir/tensor.h" namespace mindspore { +class TensorRefData : public tensor::TensorData { + public: + TensorRefData(void *data, size_t elem_count, size_t data_size, size_t ndim); + ~TensorRefData() = default; + + ssize_t size() const override; + ssize_t itemsize() const override; + ssize_t nbytes() const override; + ssize_t ndim() const override; + void *data() override; + const void *const_data() const override; + bool is_sub_data() const override { return false; } + bool has_sub_data() const override { return false; } + std::string ToString(TypeId type, const ShapeVector &shape, bool use_comma) const override; + + private: + void *data_ = nullptr; + size_t elem_count_ = 0; + size_t data_size_ = 0; + size_t ndim_ = 0; +}; + +constexpr auto kLiteDeviceName = "LiteDevice"; + +class LiteDeviceAddress : public device::DeviceAddress { + public: + LiteDeviceAddress(void *ptr, size_t size) : device::DeviceAddress(ptr, size) { device_name_ = kLiteDeviceName; } + void SetData(void *data) { set_ptr(data); } + + bool SyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const override { + return false; + } + bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr, + const std::string &format) const override { + return false; + } + bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr) const override { + return SyncHostToDevice(shape, size, type, host_ptr, "DefaultFormat"); + } + void ClearDeviceMemory() override {} +}; + +class TensorTensorImpl : public MutableTensorImpl { + public: + explicit TensorTensorImpl(const tensor::Tensor &tensor) : tensor_(std::make_shared(tensor)) {} + explicit TensorTensorImpl(const std::shared_ptr &tensor) : tensor_(tensor) {} + + void SetData(void *, bool) override { MS_LOG_EXCEPTION << "Cannot set data for TensorTensorImpl"; } + + std::shared_ptr Data() const override { + MS_EXCEPTION_IF_NULL(tensor_); + return std::shared_ptr(tensor_->data_c(), [](const void *) {}); + } + + void *MutableData() override { + MS_EXCEPTION_IF_NULL(tensor_); + return tensor_->data_c(); + } + + void SetDeviceData(void *data) override { + MS_EXCEPTION_IF_NULL(tensor_); + auto data_size = DataSize(); + auto device_address = std::make_shared(data, data_size); + tensor_->set_device_address(device_address); + } + void *GetDeviceData() override { + MS_EXCEPTION_IF_NULL(tensor_); + auto device_address = tensor_->device_address(); + if (device_address == nullptr) { + return nullptr; + } + return device_address->GetMutablePtr(); + } + + bool IsDevice() const override { + MS_EXCEPTION_IF_NULL(tensor_); + return tensor_->device_address() != nullptr; + } + + bool IsConst() const override { return false; } + + void SetShape(const std::vector &shape) override { + MS_EXCEPTION_IF_NULL(tensor_); + tensor_->set_shape(shape); + } + void SetDataType(mindspore::DataType data_type) override { + MS_EXCEPTION_IF_NULL(tensor_); + tensor_->set_data_type(static_cast(data_type)); + } + void SetName(const std::string &name) override { + MS_EXCEPTION_IF_NULL(tensor_); + tensor_->set_name(name); + } + + mindspore::Format Format() const override; + + void SetFormat(mindspore::Format format) override; + + const std::string &Name() const override { + MS_EXCEPTION_IF_NULL(tensor_); + return tensor_->name(); + } + enum DataType DataType() const override { + MS_EXCEPTION_IF_NULL(tensor_); + return static_cast(tensor_->data_type()); + } + const std::vector &Shape() const override { + MS_EXCEPTION_IF_NULL(tensor_); + return tensor_->shape(); + } + + void SetAllocator(const std::shared_ptr &allocator) override { + MS_EXCEPTION_IF_NULL(tensor_); + tensor_->set_user_data("allocator", allocator); + } + std::shared_ptr GetAllocator() const override { + MS_EXCEPTION_IF_NULL(tensor_); + return tensor_->user_data("allocator"); + } + + std::vector GetQuantParams() const override { + MS_EXCEPTION_IF_NULL(tensor_); + auto data = tensor_->user_data>("quant_param"); + return data ? *data : std::vector(); + } + + void SetQuantParams(const std::vector &quant_param) override { + MS_EXCEPTION_IF_NULL(tensor_); + tensor_->set_user_data("quant_param", std::make_shared>(quant_param)); + } + + int64_t ElementNum() const { + auto &shape = Shape(); + return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + } + size_t DataSize() const override { return ElementNum() * lite::DataTypeSize(static_cast(DataType())); } + + std::shared_ptr Clone() const override { return std::make_shared(tensor_); } + + private: + std::shared_ptr tensor_ = nullptr; +}; + class TensorUtils { public: // MSTensor <-> TensorPtr @@ -33,6 +182,10 @@ class TensorUtils { static std::vector TensorPtrToMSTensor(std::vector tensor_ptrs, const std::vector &tensor_names); + static std::vector MSTensorToTensor(const std::vector &ms_tensors); + static std::vector TensorToMSTensor(std::vector tensors, + const std::vector &tensor_names); + // TensorPtr <-> Tensor static std::vector TensorToTensorPtr( const std::vector &tensors); diff --git a/mindspore/lite/src/litert/c_api/tensor_c.cc b/mindspore/lite/src/litert/c_api/tensor_c.cc index 82be452a5c1..49778cb5de0 100644 --- a/mindspore/lite/src/litert/c_api/tensor_c.cc +++ b/mindspore/lite/src/litert/c_api/tensor_c.cc @@ -146,7 +146,7 @@ MSFormat MSTensorGetFormat(const MSTensorHandle tensor) { return kMSFormatNHWC; } auto impl = static_cast(tensor); - return static_cast(impl->format()); + return static_cast(impl->Format()); } void MSTensorSetData(MSTensorHandle tensor, void *data) { @@ -155,7 +155,7 @@ void MSTensorSetData(MSTensorHandle tensor, void *data) { return; } auto impl = static_cast(tensor); - return impl->SetData(data); + return impl->SetData(data, true); } const void *MSTensorGetData(const MSTensorHandle tensor) { diff --git a/mindspore/lite/src/litert/cxx_api/tensor/tensor_impl.cc b/mindspore/lite/src/litert/cxx_api/tensor/tensor_impl.cc index d908931953b..6f1eec63c76 100644 --- a/mindspore/lite/src/litert/cxx_api/tensor/tensor_impl.cc +++ b/mindspore/lite/src/litert/cxx_api/tensor/tensor_impl.cc @@ -77,6 +77,12 @@ std::shared_ptr LiteTensorImpl::CreateTensorImplByDeepCopy(const return impl; } +void LiteTensorImpl::SetDeviceData(void *data) { MS_LOG(ERROR) << "Not implement."; } +void *LiteTensorImpl::GetDeviceData() { + MS_LOG(ERROR) << "Not implement."; + return nullptr; +} + #ifndef STRING_KERNEL_CLIP std::shared_ptr LiteTensorImpl::StringsToTensorImpl(const std::string &name, const std::vector &str) { diff --git a/mindspore/lite/src/litert/cxx_api/tensor/tensor_impl.h b/mindspore/lite/src/litert/cxx_api/tensor/tensor_impl.h index cd0da314579..451ee99559a 100644 --- a/mindspore/lite/src/litert/cxx_api/tensor/tensor_impl.h +++ b/mindspore/lite/src/litert/cxx_api/tensor/tensor_impl.h @@ -30,11 +30,12 @@ #include "src/tensor.h" #include "src/common/log_adapter.h" #include "ir/api_tensor_impl.h" +#include "common/mutable_tensor_impl.h" namespace mindspore { using mindspore::lite::RET_OK; -class LiteTensorImpl : public mindspore::MSTensor::Impl { +class LiteTensorImpl : public MutableTensorImpl { public: LiteTensorImpl() {} @@ -80,7 +81,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl { return tensor_name_; } - void SetName(const std::string &name) { + void SetName(const std::string &name) override { if (lite_tensor_ == nullptr) { MS_LOG(ERROR) << "Invalid tensor."; return; @@ -97,7 +98,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl { return static_cast(lite_tensor_->data_type()); } - void SetDataType(enum DataType data_type) { + void SetDataType(enum DataType data_type) override { if (lite_tensor_ == nullptr) { MS_LOG(ERROR) << "Invalid tensor."; return; @@ -127,7 +128,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl { std::shared_ptr Clone() const override { return nullptr; } - void SetShape(const std::vector &shape) { + void SetShape(const std::vector &shape) override { if (lite_tensor_ == nullptr) { MS_LOG(ERROR) << "Invalid tensor."; return; @@ -138,7 +139,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl { lite_tensor_->set_shape(tensor_shape); } - std::shared_ptr allocator() const { + std::shared_ptr GetAllocator() const override { if (lite_tensor_ == nullptr) { MS_LOG(ERROR) << "Invalid tensor."; return nullptr; @@ -146,7 +147,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl { return lite_tensor_->allocator(); } - void SetAllocator(std::shared_ptr allocator) { + void SetAllocator(const std::shared_ptr &allocator) override { if (lite_tensor_ == nullptr) { MS_LOG(ERROR) << "Invalid tensor."; return; @@ -154,7 +155,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl { lite_tensor_->set_allocator(allocator); } - mindspore::Format format() { + mindspore::Format Format() const override { if (lite_tensor_ == nullptr) { MS_LOG(ERROR) << "Invalid tensor."; return mindspore::Format::NHWC; @@ -162,7 +163,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl { return lite_tensor_->format(); } - void SetFormat(mindspore::Format format) { + void SetFormat(mindspore::Format format) override { if (lite_tensor_ == nullptr) { MS_LOG(ERROR) << "Invalid tensor."; return; @@ -185,7 +186,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl { } return lite_tensor_->MutableData(); } - virtual bool IsConst() const { + bool IsConst() const override { if (lite_tensor_ == nullptr) { MS_LOG(ERROR) << "Invalid tensor."; return false; @@ -201,15 +202,15 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl { return lite_tensor_->Size(); } - void SetData(void *data) { + void SetData(void *data, bool own_data) override { if (lite_tensor_ == nullptr) { MS_LOG(ERROR) << "Invalid tensor."; return; } - lite_tensor_->set_data(data); + lite_tensor_->set_data(data, own_data); } - virtual std::vector QuantParams() const { + std::vector GetQuantParams() const override { if (lite_tensor_ == nullptr) { MS_LOG(ERROR) << "Invalid tensor."; return std::vector{}; @@ -228,7 +229,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl { return quant_params; } - void SetQuantParams(std::vector quant_params) { + void SetQuantParams(const std::vector &quant_params) override { if (lite_tensor_ == nullptr) { MS_LOG(ERROR) << "Invalid tensor."; return; @@ -261,6 +262,9 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl { void set_from_session(bool from_session) { from_session_ = from_session; } + void SetDeviceData(void *data) override; + void *GetDeviceData() override; + private: lite::Tensor *lite_tensor_ = nullptr; std::string tensor_name_ = ""; diff --git a/mindspore/lite/src/litert/cxx_api/types.cc b/mindspore/lite/src/litert/cxx_api/types.cc index b194d9fbf90..e6b1a2a79ea 100644 --- a/mindspore/lite/src/litert/cxx_api/types.cc +++ b/mindspore/lite/src/litert/cxx_api/types.cc @@ -100,6 +100,8 @@ bool MSTensor::operator==(const MSTensor &tensor) const { return lite_impl->lite_tensor() == lite_tensor_impl->lite_tensor(); } +bool MSTensor::operator!=(const MSTensor &tensor) const { return !operator==(tensor); } + MSTensor *MSTensor::CreateTensor(const std::vector &name, enum DataType type, const std::vector &shape, const void *data, size_t data_len) noexcept { if (data_len > MAX_MALLOC_SIZE) { @@ -146,12 +148,14 @@ MSTensor *MSTensor::CreateTensor(const std::vector &name, enum DataType ty } MSTensor *MSTensor::CreateRefTensor(const std::vector &name, enum DataType type, - const std::vector &shape, const void *data, size_t data_len) noexcept { + const std::vector &shape, const void *data, size_t data_len, + bool own_data) noexcept { auto impl = LiteTensorImpl::CreateTensorImpl(CharToString(name), type, shape, data, data_len); if (impl == nullptr) { MS_LOG(ERROR) << "Allocate tensor impl failed."; return nullptr; } + impl->set_own_data(own_data); auto ms_tensor = new (std::nothrow) MSTensor(impl); if (ms_tensor == nullptr) { MS_LOG(ERROR) << "Allocate tensor impl failed."; @@ -160,10 +164,10 @@ MSTensor *MSTensor::CreateRefTensor(const std::vector &name, enum DataType return ms_tensor; } -MSTensor *MSTensor::CreateDevTensor(const std::vector &name, enum DataType type, - const std::vector &shape, const void *data, size_t data_len) noexcept { +MSTensor MSTensor::CreateDeviceTensor(const std::vector &name, enum DataType type, + const std::vector &shape, void *data, size_t data_len) noexcept { MS_LOG(ERROR) << "Unsupported Feature."; - return nullptr; + return MSTensor(nullptr); } MSTensor *MSTensor::CreateTensorFromFile(const std::vector &file, enum DataType type, @@ -305,12 +309,28 @@ void *MSTensor::MutableData() { return impl_->MutableData(); } +void MSTensor::SetDeviceData(void *data) { + if (impl_ == nullptr) { + MS_LOG(ERROR) << "Invalid tensor implement."; + return; + } + std::static_pointer_cast(impl_)->SetDeviceData(data); +} + +void *MSTensor::GetDeviceData() { + if (impl_ == nullptr) { + MS_LOG(ERROR) << "Invalid tensor implement."; + return nullptr; + } + return std::static_pointer_cast(impl_)->GetDeviceData(); +} + bool MSTensor::IsConst() const { if (impl_ == nullptr) { MS_LOG(ERROR) << "Invalid tensor implement."; return false; } - return std::static_pointer_cast(impl_)->IsConst(); + return std::static_pointer_cast(impl_)->IsConst(); } size_t MSTensor::DataSize() const { @@ -338,7 +358,7 @@ void MSTensor::SetShape(const std::vector &shape) { return; } - std::static_pointer_cast(impl_)->SetShape(shape); + std::static_pointer_cast(impl_)->SetShape(shape); } void MSTensor::SetDataType(enum DataType data_type) { @@ -347,7 +367,7 @@ void MSTensor::SetDataType(enum DataType data_type) { return; } - std::static_pointer_cast(impl_)->SetDataType(data_type); + std::static_pointer_cast(impl_)->SetDataType(data_type); } void MSTensor::SetTensorName(const std::vector &name) { @@ -355,7 +375,7 @@ void MSTensor::SetTensorName(const std::vector &name) { MS_LOG(ERROR) << "Invalid tensor implement."; return; } - std::static_pointer_cast(impl_)->SetName(CharToString(name)); + std::static_pointer_cast(impl_)->SetName(CharToString(name)); } void MSTensor::SetAllocator(std::shared_ptr allocator) { @@ -364,7 +384,7 @@ void MSTensor::SetAllocator(std::shared_ptr allocator) { return; } - return std::static_pointer_cast(impl_)->SetAllocator(allocator); + return std::static_pointer_cast(impl_)->SetAllocator(allocator); } std::shared_ptr MSTensor::allocator() const { @@ -373,7 +393,7 @@ std::shared_ptr MSTensor::allocator() const { return nullptr; } - return std::static_pointer_cast(impl_)->allocator(); + return std::static_pointer_cast(impl_)->GetAllocator(); } void MSTensor::SetFormat(mindspore::Format format) { @@ -382,7 +402,7 @@ void MSTensor::SetFormat(mindspore::Format format) { return; } - return std::static_pointer_cast(impl_)->SetFormat(format); + return std::static_pointer_cast(impl_)->SetFormat(format); } mindspore::Format MSTensor::format() const { @@ -391,16 +411,16 @@ mindspore::Format MSTensor::format() const { return mindspore::Format::NHWC; } - return std::static_pointer_cast(impl_)->format(); + return std::static_pointer_cast(impl_)->Format(); } -void MSTensor::SetData(void *data) { +void MSTensor::SetData(void *data, bool own_data) { if (impl_ == nullptr) { MS_LOG(ERROR) << "Invalid tensor implement."; return; } - return std::static_pointer_cast(impl_)->SetData(data); + return std::static_pointer_cast(impl_)->SetData(data, own_data); } std::vector MSTensor::QuantParams() const { @@ -409,7 +429,7 @@ std::vector MSTensor::QuantParams() const { return std::vector{}; } - return std::static_pointer_cast(impl_)->QuantParams(); + return std::static_pointer_cast(impl_)->GetQuantParams(); } void MSTensor::SetQuantParams(std::vector quant_params) { @@ -418,7 +438,7 @@ void MSTensor::SetQuantParams(std::vector quant_params) { return; } - return std::static_pointer_cast(impl_)->SetQuantParams(quant_params); + return std::static_pointer_cast(impl_)->SetQuantParams(quant_params); } Buffer::Buffer() : impl_(std::make_shared()) {} diff --git a/mindspore/lite/src/tensor.h b/mindspore/lite/src/tensor.h index 7a15b10af87..902f676ef3b 100644 --- a/mindspore/lite/src/tensor.h +++ b/mindspore/lite/src/tensor.h @@ -128,16 +128,13 @@ class Tensor { // note: in the case of that old_data is valid, set_data just releases the ownership of it but not frees it. Of // course, you can call FreeData before calling set_data to ensure the data can be freed by current tensor. - void set_data(void *data) { - if (this->data_ == data) { - return; - } - if (allocator_ != nullptr) { + void set_data(void *data, bool own_data = true) { + if (allocator_ != nullptr && this->data_ != data) { allocator_->IncRefCount(data, 1); allocator_->DecRefCount(this->data_, 1); } this->data_ = data; - this->own_data_ = true; + this->own_data_ = own_data; } Category category() const { return this->category_; } diff --git a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc index 94d52e26a7f..1cfe1ebc86d 100644 --- a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc +++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc @@ -910,12 +910,12 @@ int BenchmarkUnifiedApi::PrintInputData() { #ifdef PARALLEL_INFERENCE void BenchmarkUnifiedApi::ModelParallelRunnerWarmUp(int index) { auto in = model_runner_.GetInputs(); - auto output = all_outputs_[index]; for (size_t i = 0; i < in.size(); i++) { in[i].SetData(all_inputs_data_[index][i]); in[i].SetShape(resize_dims_[i]); } auto warm_up_start = GetTimeUs(); + std::vector output; auto ret = model_runner_.Predict(in, &output); for (size_t j = 0; j < in.size(); j++) { in[j].SetData(nullptr); @@ -937,12 +937,12 @@ void BenchmarkUnifiedApi::ModelParallelRunnerRun(int task_num, int parallel_idx) int idx = parallel_idx + flags_->warm_up_loop_count_; auto in = model_runner_.GetInputs(); auto in_data = all_inputs_data_[idx]; - auto output = all_outputs_[idx]; for (size_t tensor_index = 0; tensor_index < in.size(); tensor_index++) { in.at(tensor_index).SetData(all_inputs_data_.at(idx)[tensor_index]); in.at(tensor_index).SetShape(resize_dims_.at(tensor_index)); } auto predict_start = GetTimeUs(); + std::vector output; auto ret = model_runner_.Predict(in, &output); if (ret != kSuccess) { model_parallel_runner_ret_failed_ = true; diff --git a/mindspore/lite/tools/converter/registry/CMakeLists.txt b/mindspore/lite/tools/converter/registry/CMakeLists.txt index 12296e857c0..88e610ee832 100644 --- a/mindspore/lite/tools/converter/registry/CMakeLists.txt +++ b/mindspore/lite/tools/converter/registry/CMakeLists.txt @@ -13,6 +13,7 @@ set(REG_SRC ${CONVERT_REG_SRC} ${KERNEL_REG_DIR}/../common/string_util.cc ${KERNEL_REG_DIR}/../common/utils.cc ${KERNEL_REG_DIR}/../extendrt/delegate/tensorrt/distribution/distribution_base.cc + ${KERNEL_REG_DIR}/../extendrt/delegate/plugin/tensorrt_executor_plugin.cc ${CORE_DIR}/utils/log_adapter.cc ${CORE_DIR}/utils/status.cc ${CONVERTER_DIR}/converter_context.cc diff --git a/tests/st/cpp/model/test_zero_copy.cc b/tests/st/cpp/model/test_zero_copy.cc index 6982f569027..1c0b091c2cd 100644 --- a/tests/st/cpp/model/test_zero_copy.cc +++ b/tests/st/cpp/model/test_zero_copy.cc @@ -137,9 +137,9 @@ TEST_F(TestZeroCopy, TestDeviceTensor) { // Apply transform on images Status rc = Transform(image, &image); ASSERT_TRUE(rc == kSuccess); - MSTensor *device_tensor = - MSTensor::CreateDevTensor(image.Name(), image.DataType(), image.Shape(), - image.MutableData(), image.DataSize()); + MSTensor device_tensor = + MSTensor::CreateDeviceTensor(image.Name(), image.DataType(), image.Shape(), + image.MutableData(), image.DataSize()); MSTensor *tensor = MSTensor::CreateTensor(image.Name(), image.DataType(), image.Shape(), image.Data().get(), image.DataSize()); @@ -158,7 +158,7 @@ TEST_F(TestZeroCopy, TestDeviceTensor) { inputs.clear(); start_time = (TimeValue){0}; end_time = (TimeValue){0}; - inputs.push_back(*device_tensor); + inputs.push_back(device_tensor); // infer with device tensor (void)gettimeofday(&start_time, nullptr);