forked from mindspore-Ecosystem/mindspore
Cloud inference tensor opt
This commit is contained in:
parent
aaeae5d3ae
commit
f33ea707cd
|
@ -81,10 +81,11 @@ class MS_API MSTensor {
|
|||
/// \param[in] shape The shape of the MSTensor.
|
||||
/// \param[in] data The data pointer that points to allocated memory.
|
||||
/// \param[in] data_len The length of the memory, in bytes.
|
||||
/// \param[in] own_data Whether the data memory should be freed in MSTensor destruction.
|
||||
///
|
||||
/// \return A pointer of MSTensor.
|
||||
static inline MSTensor *CreateRefTensor(const std::string &name, DataType type, const std::vector<int64_t> &shape,
|
||||
const void *data, size_t data_len) noexcept;
|
||||
const void *data, size_t data_len, bool own_data = true) noexcept;
|
||||
|
||||
/// \brief Creates a MSTensor object, whose device data can be directly accessed by Model, must be used in pairs with
|
||||
/// DestroyTensorPtr.
|
||||
|
@ -96,8 +97,8 @@ class MS_API MSTensor {
|
|||
/// \param[in] data_len The length of the memory, in bytes.
|
||||
///
|
||||
/// \return A pointer of MSTensor.
|
||||
static inline MSTensor *CreateDevTensor(const std::string &name, DataType type, const std::vector<int64_t> &shape,
|
||||
const void *data, size_t data_len) noexcept;
|
||||
static inline MSTensor CreateDeviceTensor(const std::string &name, DataType type, const std::vector<int64_t> &shape,
|
||||
void *data, size_t data_len) noexcept;
|
||||
|
||||
/// \brief Creates a MSTensor object from local file, must be used in pairs with DestroyTensorPtr.
|
||||
///
|
||||
|
@ -125,7 +126,7 @@ class MS_API MSTensor {
|
|||
/// \return A vector container containing several strings.
|
||||
static inline std::vector<std::string> TensorToStrings(const MSTensor &tensor);
|
||||
|
||||
/// \brief Destroy an object created by Clone, StringsToTensor, CreateRefTensor, CreateDevTensor or CreateTensor. Do
|
||||
/// \brief Destroy an object created by Clone, StringsToTensor, CreateRefTensor or CreateTensor. Do
|
||||
/// not use it to destroy MSTensor from other sources.
|
||||
///
|
||||
/// \param[in] tensor A MSTensor object.
|
||||
|
@ -207,6 +208,13 @@ class MS_API MSTensor {
|
|||
/// \return The boolean value that indicates whether the MSTensor equals tensor.
|
||||
bool operator==(const MSTensor &tensor) const;
|
||||
|
||||
/// \brief Get the boolean value that indicates whether the MSTensor not equals tensor.
|
||||
///
|
||||
/// \param[in] another MSTensor.
|
||||
///
|
||||
/// \return The boolean value that indicates whether the MSTensor not equals tensor.
|
||||
bool operator!=(const MSTensor &tensor) const;
|
||||
|
||||
/// \brief Set the shape of for the MSTensor. Only valid for Lite.
|
||||
///
|
||||
/// \param[in] shape Shape of the MSTensor, a vector of int64_t.
|
||||
|
@ -251,7 +259,20 @@ class MS_API MSTensor {
|
|||
/// \note The memory pointed to origin data pointer of MSTensor needs to be managed by the user
|
||||
///
|
||||
/// \param[in] data A pointer to the data of the MSTensor.
|
||||
void SetData(void *data);
|
||||
/// \param[in] own_data Whether the data memory should be freed in MSTensor destruction.
|
||||
void SetData(void *data, bool own_data = true);
|
||||
|
||||
/// \brief Set the device data address for the MSTensor. Only valid for Lite.
|
||||
///
|
||||
/// \note The memory pointed to origin data pointer of MSTensor needs to be managed by the user
|
||||
///
|
||||
/// \param[in] data A pointer to the device data of the MSTensor.
|
||||
void SetDeviceData(void *data);
|
||||
|
||||
/// \brief Get the device data address of the MSTensor set by SetDeviceData. Only valid for Lite.
|
||||
///
|
||||
/// \return A pointer to the device data of the MSTensor.
|
||||
void *GetDeviceData();
|
||||
|
||||
/// \brief Get the quantization parameters of the MSTensor. Only valid for Lite.
|
||||
///
|
||||
|
@ -270,9 +291,9 @@ class MS_API MSTensor {
|
|||
static MSTensor *CreateTensor(const std::vector<char> &name, enum DataType type, const std::vector<int64_t> &shape,
|
||||
const void *data, size_t data_len) noexcept;
|
||||
static MSTensor *CreateRefTensor(const std::vector<char> &name, enum DataType type, const std::vector<int64_t> &shape,
|
||||
const void *data, size_t data_len) noexcept;
|
||||
static MSTensor *CreateDevTensor(const std::vector<char> &name, enum DataType type, const std::vector<int64_t> &shape,
|
||||
const void *data, size_t data_len) noexcept;
|
||||
const void *data, size_t data_len, bool own_data) noexcept;
|
||||
static MSTensor CreateDeviceTensor(const std::vector<char> &name, enum DataType type,
|
||||
const std::vector<int64_t> &shape, void *data, size_t data_len) noexcept;
|
||||
static MSTensor *CreateTensorFromFile(const std::vector<char> &file, enum DataType type,
|
||||
const std::vector<int64_t> &shape) noexcept;
|
||||
static MSTensor *CharStringsToTensor(const std::vector<char> &name, const std::vector<std::vector<char>> &str);
|
||||
|
@ -313,13 +334,13 @@ MSTensor *MSTensor::CreateTensor(const std::string &name, enum DataType type, co
|
|||
}
|
||||
|
||||
MSTensor *MSTensor::CreateRefTensor(const std::string &name, enum DataType type, const std::vector<int64_t> &shape,
|
||||
const void *data, size_t data_len) noexcept {
|
||||
return CreateRefTensor(StringToChar(name), type, shape, data, data_len);
|
||||
const void *data, size_t data_len, bool own_data) noexcept {
|
||||
return CreateRefTensor(StringToChar(name), type, shape, data, data_len, own_data);
|
||||
}
|
||||
|
||||
MSTensor *MSTensor::CreateDevTensor(const std::string &name, enum DataType type, const std::vector<int64_t> &shape,
|
||||
const void *data, size_t data_len) noexcept {
|
||||
return CreateDevTensor(StringToChar(name), type, shape, data, data_len);
|
||||
MSTensor MSTensor::CreateDeviceTensor(const std::string &name, enum DataType type, const std::vector<int64_t> &shape,
|
||||
void *data, size_t data_len) noexcept {
|
||||
return CreateDeviceTensor(StringToChar(name), type, shape, data, data_len);
|
||||
}
|
||||
|
||||
MSTensor *MSTensor::CreateTensorFromFile(const std::string &file, enum DataType type,
|
||||
|
|
|
@ -155,7 +155,8 @@ MSTensor *MSTensor::CreateTensor(const std::vector<char> &name, enum DataType ty
|
|||
}
|
||||
|
||||
MSTensor *MSTensor::CreateRefTensor(const std::vector<char> &name, enum DataType type,
|
||||
const std::vector<int64_t> &shape, const void *data, size_t data_len) noexcept {
|
||||
const std::vector<int64_t> &shape, const void *data, size_t data_len,
|
||||
bool) noexcept {
|
||||
std::string name_str = CharToString(name);
|
||||
try {
|
||||
std::shared_ptr<Impl> impl = std::make_shared<TensorReferenceImpl>(name_str, type, shape, data, data_len, false);
|
||||
|
@ -170,19 +171,18 @@ MSTensor *MSTensor::CreateRefTensor(const std::vector<char> &name, enum DataType
|
|||
}
|
||||
}
|
||||
|
||||
MSTensor *MSTensor::CreateDevTensor(const std::vector<char> &name, enum DataType type,
|
||||
const std::vector<int64_t> &shape, const void *data, size_t data_len) noexcept {
|
||||
MSTensor MSTensor::CreateDeviceTensor(const std::vector<char> &name, enum DataType type,
|
||||
const std::vector<int64_t> &shape, void *data, size_t data_len) noexcept {
|
||||
std::string name_str = CharToString(name);
|
||||
try {
|
||||
std::shared_ptr<Impl> impl = std::make_shared<TensorReferenceImpl>(name_str, type, shape, data, data_len, true);
|
||||
MSTensor *ret = new MSTensor(impl);
|
||||
return ret;
|
||||
return MSTensor(impl);
|
||||
} catch (const std::bad_alloc &) {
|
||||
MS_LOG(ERROR) << "Malloc memory failed.";
|
||||
return nullptr;
|
||||
return MSTensor(nullptr);
|
||||
} catch (...) {
|
||||
MS_LOG(ERROR) << "Unknown error occurred.";
|
||||
return nullptr;
|
||||
return MSTensor(nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -382,6 +382,10 @@ bool MSTensor::operator==(std::nullptr_t) const { return impl_ == nullptr; }
|
|||
|
||||
bool MSTensor::operator!=(std::nullptr_t) const { return impl_ != nullptr; }
|
||||
|
||||
bool MSTensor::operator==(const MSTensor &tensor) const { return impl_ == tensor.impl_; }
|
||||
|
||||
bool MSTensor::operator!=(const MSTensor &tensor) const { return impl_ != tensor.impl_; }
|
||||
|
||||
MSTensor *MSTensor::Clone() const {
|
||||
MS_EXCEPTION_IF_NULL(impl_);
|
||||
try {
|
||||
|
@ -456,7 +460,11 @@ void MSTensor::SetFormat(mindspore::Format) { MS_LOG_EXCEPTION << "Invalid imple
|
|||
|
||||
mindspore::Format MSTensor::format() const { MS_LOG_EXCEPTION << "Invalid implement."; }
|
||||
|
||||
void MSTensor::SetData(void *) { MS_LOG_EXCEPTION << "Invalid implement."; }
|
||||
void MSTensor::SetData(void *, bool) { MS_LOG_EXCEPTION << "Invalid implement."; }
|
||||
|
||||
void MSTensor::SetDeviceData(void *) { MS_LOG_EXCEPTION << "Invalid implement."; }
|
||||
|
||||
void *MSTensor::GetDeviceData() { MS_LOG_EXCEPTION << "Invalid implement."; }
|
||||
|
||||
std::vector<QuantParam> MSTensor::QuantParams() const { MS_LOG_EXCEPTION << "Invalid implement."; }
|
||||
|
||||
|
|
|
@ -628,7 +628,8 @@ Tensor::Tensor(const Tensor &tensor)
|
|||
device_event_(tensor.device_event_),
|
||||
lazy_callback_(tensor.lazy_callback_),
|
||||
user_data_(tensor.user_data_),
|
||||
compression_type_(tensor.compression_type_) {}
|
||||
compression_type_(tensor.compression_type_),
|
||||
tensor_name_(tensor.tensor_name_) {}
|
||||
|
||||
Tensor::Tensor(const Tensor &tensor, TypeId data_type)
|
||||
: MetaTensor(data_type, tensor.shape_),
|
||||
|
@ -649,7 +650,8 @@ Tensor::Tensor(const Tensor &tensor, TypeId data_type)
|
|||
device_event_(tensor.device_event_),
|
||||
lazy_callback_(tensor.lazy_callback_),
|
||||
user_data_(tensor.user_data_),
|
||||
compression_type_(tensor.compression_type_) {}
|
||||
compression_type_(tensor.compression_type_),
|
||||
tensor_name_(tensor.tensor_name_) {}
|
||||
|
||||
Tensor::Tensor(TypeId data_type, const ShapeVector &shape, TensorDataPtr data)
|
||||
: MetaTensor(data_type, shape), data_(std::move(data)), id_(MakeId()) {}
|
||||
|
|
|
@ -669,6 +669,16 @@ class MS_CORE_API Tensor final : public MetaTensor {
|
|||
/// \return tensor compression type.
|
||||
TensorCompressionType compression_type() const { return compression_type_; }
|
||||
|
||||
/// \brief Set tensor name.
|
||||
///
|
||||
/// \param[in] tensor_name The tensor name.
|
||||
void set_name(const std::string &tensor_name) { tensor_name_ = tensor_name; }
|
||||
|
||||
/// \brief Get the tensor name.
|
||||
///
|
||||
/// \return tensor name.
|
||||
const std::string &name() const { return tensor_name_; }
|
||||
|
||||
private:
|
||||
void ExecuteLazyTask() const;
|
||||
|
||||
|
@ -695,6 +705,8 @@ class MS_CORE_API Tensor final : public MetaTensor {
|
|||
std::function<void(void)> lazy_callback_{nullptr};
|
||||
UserData user_data_;
|
||||
TensorCompressionType compression_type_{kNoCompression};
|
||||
|
||||
std::string tensor_name_;
|
||||
};
|
||||
|
||||
// CSRTensor entity class
|
||||
|
|
|
@ -67,6 +67,10 @@ if(MACHINE_LINUX_ARM64)
|
|||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+fp16")
|
||||
endif()
|
||||
|
||||
if(DEFINED ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE})
|
||||
set(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE $ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE})
|
||||
endif()
|
||||
|
||||
if(DEFINED ENV{MSLITE_ENABLE_EXPERIMENTAL_KERNEL})
|
||||
set(MSLITE_ENABLE_EXPERIMENTAL_KERNEL $ENV{MSLITE_ENABLE_EXPERIMENTAL_KERNEL})
|
||||
endif()
|
||||
|
@ -90,6 +94,10 @@ if(DEFINED ENV{MSLITE_ENABLE_TRAIN})
|
|||
set(MSLITE_ENABLE_TRAIN $ENV{MSLITE_ENABLE_TRAIN})
|
||||
endif()
|
||||
|
||||
if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
|
||||
set(MSLITE_ENABLE_TRAIN OFF)
|
||||
endif()
|
||||
|
||||
if(DEFINED ENV{MSLITE_ENABLE_SERVER_INFERENCE})
|
||||
set(MSLITE_ENABLE_SERVER_INFERENCE $ENV{MSLITE_ENABLE_SERVER_INFERENCE})
|
||||
endif()
|
||||
|
@ -187,6 +195,10 @@ if(DEFINED ENV{MSLITE_ENABLE_MODEL_ENCRYPTION})
|
|||
endif()
|
||||
endif()
|
||||
|
||||
if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
|
||||
set(MSLITE_ENABLE_MODEL_ENCRYPTION ON)
|
||||
endif()
|
||||
|
||||
if(DEFINED ENV{MSLITE_ENABLE_COVERAGE})
|
||||
set(MSLITE_ENABLE_COVERAGE $ENV{MSLITE_ENABLE_COVERAGE})
|
||||
endif()
|
||||
|
@ -211,10 +223,6 @@ if(MSLITE_ENABLE_GITEE_MIRROR)
|
|||
set(ENABLE_GITEE ON)
|
||||
endif()
|
||||
|
||||
if(DEFINED ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE})
|
||||
set(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE $ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE})
|
||||
endif()
|
||||
|
||||
if(DEFINED ENV{ENABLE_FAST_HASH_TABLE})
|
||||
add_compile_definitions(ENABLE_FAST_HASH_TABLE)
|
||||
include_directories(${TOP_DIR}/third_party/robin_hood/include)
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_COMMON_MUTABLE_TESNOR_IMPL_H_
|
||||
#define MINDSPORE_LITE_SRC_COMMON_MUTABLE_TESNOR_IMPL_H_
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "ir/api_tensor_impl.h"
|
||||
|
||||
namespace mindspore {
|
||||
class MutableTensorImpl : public MSTensor::Impl {
|
||||
public:
|
||||
virtual void SetName(const std::string &name) = 0;
|
||||
virtual void SetDataType(mindspore::DataType data_type) = 0;
|
||||
virtual void SetShape(const std::vector<int64_t> &shape) = 0;
|
||||
virtual mindspore::Format Format() const = 0;
|
||||
virtual void SetFormat(mindspore::Format format) = 0;
|
||||
virtual void SetData(void *data, bool own_data) = 0;
|
||||
virtual bool IsConst() const = 0;
|
||||
virtual void SetAllocator(const std::shared_ptr<Allocator> &allocator) = 0;
|
||||
virtual std::shared_ptr<Allocator> GetAllocator() const = 0;
|
||||
virtual std::vector<QuantParam> GetQuantParams() const = 0;
|
||||
virtual void SetQuantParams(const std::vector<QuantParam> &quant_param) = 0;
|
||||
virtual void SetDeviceData(void *data) = 0;
|
||||
virtual void *GetDeviceData() = 0;
|
||||
};
|
||||
using MutableTensorImplPtr = std::shared_ptr<MutableTensorImpl>;
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_COMMON_MUTABLE_TESNOR_IMPL_H_
|
|
@ -89,21 +89,10 @@ std::vector<MSTensor> ModelImpl::GetInputs() {
|
|||
std::vector<MSTensor> inputs;
|
||||
|
||||
auto graph_inputs = session_->GetInputs();
|
||||
auto graph_input_names = session_->GetInputNames();
|
||||
|
||||
for (size_t i = 0; i < graph_inputs.size(); i++) {
|
||||
auto graph_input = graph_inputs[i];
|
||||
std::string graph_input_name = graph_input_names[i];
|
||||
auto type_id = graph_input->data_type_c();
|
||||
auto data_type = static_cast<mindspore::DataType>(type_id);
|
||||
auto ms_tensor_ptr = MSTensor::CreateRefTensor(graph_input_name, data_type, graph_input->shape_c(),
|
||||
graph_input->data_c(), graph_input->Size());
|
||||
if (ms_tensor_ptr == nullptr) {
|
||||
MS_LOG_WARNING << "Failed to create input tensor ";
|
||||
return {};
|
||||
}
|
||||
inputs.push_back(*ms_tensor_ptr);
|
||||
delete ms_tensor_ptr;
|
||||
auto tensor_impl = graph_inputs[i];
|
||||
inputs.push_back(MSTensor(tensor_impl));
|
||||
}
|
||||
return inputs;
|
||||
}
|
||||
|
@ -111,23 +100,10 @@ std::vector<MSTensor> ModelImpl::GetInputs() {
|
|||
std::vector<MSTensor> ModelImpl::GetOutputs() {
|
||||
MS_EXCEPTION_IF_NULL(session_);
|
||||
std::vector<MSTensor> outputs;
|
||||
|
||||
auto graph_outputs = session_->GetOutputs();
|
||||
auto graph_output_names = session_->GetOutputNames();
|
||||
|
||||
for (size_t i = 0; i < graph_outputs.size(); i++) {
|
||||
auto graph_output = graph_outputs[i];
|
||||
std::string graph_output_name = graph_output_names[i];
|
||||
auto type_id = graph_output->data_type_c();
|
||||
auto data_type = static_cast<mindspore::DataType>(type_id);
|
||||
auto ms_tensor_ptr = MSTensor::CreateRefTensor(graph_output_name, data_type, graph_output->shape_c(),
|
||||
graph_output->data_c(), graph_output->Size());
|
||||
if (ms_tensor_ptr == nullptr) {
|
||||
MS_LOG_WARNING << "Failed to create output tensor ";
|
||||
return {};
|
||||
}
|
||||
outputs.push_back(*ms_tensor_ptr);
|
||||
delete ms_tensor_ptr;
|
||||
auto tensor_impl = graph_outputs[i];
|
||||
outputs.push_back(MSTensor(tensor_impl));
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
|
@ -137,17 +113,12 @@ MSTensor ModelImpl::GetInputByTensorName(const std::string &name) {
|
|||
MS_LOG(ERROR) << "Session is null.";
|
||||
return MSTensor(nullptr);
|
||||
}
|
||||
auto tensor_ptr = session_->GetInputByTensorName(name);
|
||||
if (tensor_ptr == nullptr) {
|
||||
auto tensor_impl = session_->GetInputByTensorName(name);
|
||||
if (tensor_impl == nullptr) {
|
||||
MS_LOG(ERROR) << "Model does not contains tensor " << name << " .";
|
||||
return MSTensor(nullptr);
|
||||
}
|
||||
auto ms_inputs = TensorUtils::TensorPtrToMSTensor({tensor_ptr}, {name});
|
||||
if (ms_inputs.empty()) {
|
||||
MS_LOG(ERROR) << "Tensor to ms tensor failed." << name << " .";
|
||||
return MSTensor(nullptr);
|
||||
}
|
||||
return ms_inputs[0];
|
||||
return MSTensor(tensor_impl);
|
||||
}
|
||||
|
||||
std::vector<std::string> ModelImpl::GetOutputTensorNames() {
|
||||
|
@ -164,35 +135,58 @@ MSTensor ModelImpl::GetOutputByTensorName(const std::string &name) {
|
|||
MS_LOG(ERROR) << "Session is null.";
|
||||
return MSTensor(nullptr);
|
||||
}
|
||||
auto tensor_ptr = session_->GetOutputByTensorName(name);
|
||||
if (tensor_ptr == nullptr) {
|
||||
auto tensor_impl = session_->GetOutputByTensorName(name);
|
||||
if (tensor_impl == nullptr) {
|
||||
MS_LOG(ERROR) << "Model does not contains tensor " << name << " .";
|
||||
return MSTensor(nullptr);
|
||||
}
|
||||
auto ms_outputs = TensorUtils::TensorPtrToMSTensor({tensor_ptr}, {name});
|
||||
if (ms_outputs.empty()) {
|
||||
MS_LOG(ERROR) << "Tensor to ms tensor failed." << name << " .";
|
||||
return MSTensor(nullptr);
|
||||
}
|
||||
return ms_outputs[0];
|
||||
return MSTensor(tensor_impl);
|
||||
}
|
||||
|
||||
Status ModelImpl::Predict(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs) {
|
||||
MS_EXCEPTION_IF_NULL(session_);
|
||||
MS_EXCEPTION_IF_NULL(outputs);
|
||||
outputs->clear();
|
||||
std::vector<mindspore::tensor::TensorPtr> graph_inputs = TensorUtils::MSTensorToTensorPtr(inputs);
|
||||
std::vector<mindspore::tensor::TensorPtr> graph_outputs;
|
||||
std::vector<mindspore::tensor::Tensor> graph_inputs = TensorUtils::MSTensorToTensor(inputs);
|
||||
std::vector<mindspore::tensor::Tensor> graph_outputs;
|
||||
std::vector<mindspore::tensor::Tensor> org_graph_outputs;
|
||||
if (!outputs->empty()) {
|
||||
graph_outputs = TensorUtils::MSTensorToTensor(*outputs);
|
||||
org_graph_outputs = graph_outputs;
|
||||
}
|
||||
auto ret = session_->RunGraph(graph_inputs, &graph_outputs);
|
||||
if (ret != kSuccess) {
|
||||
MS_LOG(ERROR) << "ModelImpl::Predict RunGraph failed with " << ret;
|
||||
return ret;
|
||||
}
|
||||
auto ms_outputs = TensorUtils::TensorPtrToMSTensor(graph_outputs, session_->GetOutputNames());
|
||||
(void)std::copy(ms_outputs.begin(), ms_outputs.end(), std::back_inserter(*outputs));
|
||||
if (outputs->empty() || org_graph_outputs != graph_outputs) {
|
||||
*outputs = TensorUtils::TensorToMSTensor(graph_outputs, session_->GetOutputNames());
|
||||
}
|
||||
auto session_outputs = GetOutputs();
|
||||
if (graph_outputs.size() != session_outputs.size()) {
|
||||
MS_LOG(ERROR) << "Outputs count get from session " << session_outputs.size() << " != outputs count of RunGraph "
|
||||
<< graph_outputs.size();
|
||||
return kCoreFailed;
|
||||
}
|
||||
for (size_t i = 0; i < session_outputs.size(); i++) {
|
||||
auto &session_output = session_outputs[i];
|
||||
auto &execute_output = outputs->at(i);
|
||||
session_output.SetShape(execute_output.Shape());
|
||||
if (session_output.Data().get() != execute_output.Data().get()) {
|
||||
session_output.SetData(execute_output.MutableData(), false);
|
||||
}
|
||||
if (session_output.GetDeviceData() != execute_output.GetDeviceData()) {
|
||||
session_output.SetDeviceData(execute_output.GetDeviceData());
|
||||
}
|
||||
}
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
Status ModelImpl::Predict() {
|
||||
auto inputs = GetInputs();
|
||||
auto outputs = GetOutputs();
|
||||
return Predict(inputs, &outputs);
|
||||
}
|
||||
|
||||
bool ModelImpl::HasPreprocess() { return graph_->graph_data_->GetPreprocess().empty() ? false : true; }
|
||||
|
||||
Status ModelImpl::Preprocess(const std::vector<std::vector<MSTensor>> &inputs, std::vector<MSTensor> *outputs) {
|
||||
|
|
|
@ -200,16 +200,21 @@ Status ModelWorker::Predict(const std::vector<MSTensor> &inputs, std::vector<MST
|
|||
bool need_copy_output = true;
|
||||
auto model_output = model_->GetOutputs();
|
||||
for (size_t i = 0; i < outputs->size(); i++) {
|
||||
if (outputs->at(i).Data() != nullptr) {
|
||||
auto &output = outputs->at(i);
|
||||
if (output.Data() != nullptr || output.GetDeviceData() != nullptr) {
|
||||
/* user set graph-output-tensor from outside */
|
||||
model_output[i].SetData(outputs->at(i).MutableData());
|
||||
model_output[i].SetShape(output.Shape());
|
||||
model_output[i].SetData(output.MutableData(), false);
|
||||
model_output[i].SetDeviceData(output.GetDeviceData());
|
||||
model_output[i].SetAllocator(nullptr);
|
||||
need_copy_output = false;
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
model_input[i].SetData(const_cast<MSTensor &>(inputs[i]).MutableData());
|
||||
model_input[i].SetShape(inputs[i].Shape());
|
||||
auto &input = inputs[i];
|
||||
model_input[i].SetShape(input.Shape());
|
||||
model_input[i].SetData(const_cast<MSTensor &>(input).MutableData(), false);
|
||||
model_input[i].SetDeviceData(const_cast<MSTensor &>(input).GetDeviceData());
|
||||
}
|
||||
auto status = model_->Predict(model_input, &model_output, before, after);
|
||||
if (status != kSuccess) {
|
||||
|
@ -232,6 +237,7 @@ Status ModelWorker::Predict(const std::vector<MSTensor> &inputs, std::vector<MST
|
|||
for (size_t i = 0; i < outputs->size(); i++) {
|
||||
outputs->at(i).SetShape(model_output[i].Shape());
|
||||
model_output[i].SetData(nullptr);
|
||||
model_output[i].SetDeviceData(nullptr);
|
||||
model_output[i].SetAllocator(nullptr);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,6 +20,11 @@
|
|||
#include "mindspore/core/ir/api_tensor_impl.h"
|
||||
#include "mindspore/core/utils/convert_utils_base.h"
|
||||
#include "utils/file_utils.h"
|
||||
#include "common/utils.h"
|
||||
#include "mindspore/core/ir/tensor.h"
|
||||
#include "runtime/device/device_address.h"
|
||||
#include "extendrt/utils/tensor_utils.h"
|
||||
#include "extendrt/utils/tensor_default_impl.h"
|
||||
|
||||
namespace mindspore {
|
||||
class Buffer::Impl {
|
||||
|
@ -71,105 +76,12 @@ class Buffer::Impl {
|
|||
std::vector<uint8_t> data_;
|
||||
};
|
||||
|
||||
class MutableTensorImpl : public MSTensor::Impl {
|
||||
public:
|
||||
MutableTensorImpl() = default;
|
||||
MutableTensorImpl(const std::string &name, enum DataType type, const std::vector<int64_t> &shape)
|
||||
: name_(name), type_(type), shape_(shape) {}
|
||||
|
||||
virtual void SetData(void *data) = 0;
|
||||
|
||||
void SetShape(const std::vector<int64_t> &shape) { shape_ = shape; }
|
||||
void SetDataType(mindspore::DataType data_type) { type_ = data_type; }
|
||||
void SetTensorName(const std::string &name) { name_ = name; }
|
||||
|
||||
mindspore::Format GetFormat() const { return format_; }
|
||||
void SetFormat(mindspore::Format format) { format_ = format; }
|
||||
|
||||
const std::string &Name() const override { return name_; }
|
||||
enum DataType DataType() const override { return type_; }
|
||||
const std::vector<int64_t> &Shape() const override { return shape_; }
|
||||
|
||||
void SetAllocator(const std::shared_ptr<Allocator> &allocator) { allocator_ = allocator; }
|
||||
std::shared_ptr<Allocator> GetAllocator() const { return allocator_; }
|
||||
|
||||
std::vector<QuantParam> QuantParams() const { return quant_param_; }
|
||||
|
||||
void SetQuantParams(const std::vector<QuantParam> &quant_param) { quant_param_ = quant_param; }
|
||||
|
||||
protected:
|
||||
std::string name_;
|
||||
enum DataType type_ = DataType::kTypeUnknown;
|
||||
enum Format format_ = mindspore::NCHW;
|
||||
std::vector<int64_t> shape_;
|
||||
std::shared_ptr<Allocator> allocator_ = nullptr;
|
||||
std::vector<QuantParam> quant_param_;
|
||||
};
|
||||
|
||||
class TensorDefaultImpl : public MutableTensorImpl {
|
||||
public:
|
||||
TensorDefaultImpl() : buffer_() {}
|
||||
~TensorDefaultImpl() override = default;
|
||||
TensorDefaultImpl(const std::string &name, enum DataType type, const std::vector<int64_t> &shape, const void *data,
|
||||
size_t data_len)
|
||||
: MutableTensorImpl(name, type, shape), buffer_(data, data_len) {}
|
||||
|
||||
std::shared_ptr<const void> Data() const override {
|
||||
return std::shared_ptr<const void>(buffer_.Data(), [](const void *) {});
|
||||
}
|
||||
|
||||
void SetData(void *data) override {
|
||||
auto data_len = buffer_.DataSize();
|
||||
buffer_.SetData(data, data_len);
|
||||
}
|
||||
|
||||
void *MutableData() override { return buffer_.MutableData(); }
|
||||
size_t DataSize() const override { return buffer_.DataSize(); }
|
||||
|
||||
bool IsDevice() const override { return false; }
|
||||
|
||||
std::shared_ptr<Impl> Clone() const override {
|
||||
return std::make_shared<TensorDefaultImpl>(name_, type_, shape_, buffer_.Data(), buffer_.DataSize());
|
||||
}
|
||||
|
||||
private:
|
||||
Buffer buffer_;
|
||||
};
|
||||
|
||||
class TensorReferenceImpl : public MutableTensorImpl {
|
||||
public:
|
||||
TensorReferenceImpl() = default;
|
||||
~TensorReferenceImpl() override = default;
|
||||
TensorReferenceImpl(const std::string &name, enum DataType type, const std::vector<int64_t> &shape, const void *data,
|
||||
size_t data_len, bool is_device)
|
||||
: MutableTensorImpl(name, type, shape), data_(data), data_size_(data_len), is_device_(is_device) {}
|
||||
|
||||
std::shared_ptr<const void> Data() const override {
|
||||
return std::shared_ptr<const void>(data_, [](const void *) {});
|
||||
}
|
||||
|
||||
void SetData(void *data) override { data_ = data; }
|
||||
|
||||
void *MutableData() override { return const_cast<void *>(data_); }
|
||||
size_t DataSize() const override { return data_size_; }
|
||||
|
||||
bool IsDevice() const override { return is_device_; }
|
||||
|
||||
std::shared_ptr<Impl> Clone() const override {
|
||||
return std::make_shared<TensorReferenceImpl>(name_, type_, shape_, data_, data_size_, is_device_);
|
||||
}
|
||||
|
||||
protected:
|
||||
const void *data_ = nullptr;
|
||||
size_t data_size_ = 0;
|
||||
bool is_device_ = false;
|
||||
};
|
||||
|
||||
MSTensor *MSTensor::CreateTensor(const std::vector<char> &name, enum DataType type, const std::vector<int64_t> &shape,
|
||||
const void *data, size_t data_len) noexcept {
|
||||
std::string name_str = CharToString(name);
|
||||
try {
|
||||
std::shared_ptr<Impl> impl = std::make_shared<TensorDefaultImpl>(name_str, type, shape, data, data_len);
|
||||
std::shared_ptr<Impl> impl =
|
||||
std::make_shared<TensorDefaultImpl>(name_str, type, shape, data, data_len, false, false);
|
||||
MSTensor *ret = new MSTensor(impl);
|
||||
return ret;
|
||||
} catch (const std::bad_alloc &) {
|
||||
|
@ -182,10 +94,17 @@ MSTensor *MSTensor::CreateTensor(const std::vector<char> &name, enum DataType ty
|
|||
}
|
||||
|
||||
MSTensor *MSTensor::CreateRefTensor(const std::vector<char> &name, enum DataType type,
|
||||
const std::vector<int64_t> &shape, const void *data, size_t data_len) noexcept {
|
||||
const std::vector<int64_t> &shape, const void *data, size_t data_len,
|
||||
bool own_data) noexcept {
|
||||
std::string name_str = CharToString(name);
|
||||
try {
|
||||
std::shared_ptr<Impl> impl = std::make_shared<TensorReferenceImpl>(name_str, type, shape, data, data_len, false);
|
||||
std::shared_ptr<Impl> impl =
|
||||
std::make_shared<TensorDefaultImpl>(name_str, type, shape, data, data_len, true, own_data);
|
||||
if (data_len < impl->DataSize()) {
|
||||
MS_LOG(ERROR) << "The size " << data_len << " of data cannot be less that the memory size required by the shape "
|
||||
<< shape << " and data type " << TypeIdToString(static_cast<enum TypeId>(type));
|
||||
return nullptr;
|
||||
}
|
||||
MSTensor *ret = new MSTensor(impl);
|
||||
return ret;
|
||||
} catch (const std::bad_alloc &) {
|
||||
|
@ -197,19 +116,24 @@ MSTensor *MSTensor::CreateRefTensor(const std::vector<char> &name, enum DataType
|
|||
}
|
||||
}
|
||||
|
||||
MSTensor *MSTensor::CreateDevTensor(const std::vector<char> &name, enum DataType type,
|
||||
const std::vector<int64_t> &shape, const void *data, size_t data_len) noexcept {
|
||||
MSTensor MSTensor::CreateDeviceTensor(const std::vector<char> &name, enum DataType type,
|
||||
const std::vector<int64_t> &shape, void *data, size_t data_size) noexcept {
|
||||
std::string name_str = CharToString(name);
|
||||
try {
|
||||
std::shared_ptr<Impl> impl = std::make_shared<TensorReferenceImpl>(name_str, type, shape, data, data_len, true);
|
||||
MSTensor *ret = new MSTensor(impl);
|
||||
return ret;
|
||||
auto impl = std::make_shared<TensorDefaultImpl>(name_str, type, shape);
|
||||
if (data_size < impl->DataSize()) {
|
||||
MS_LOG(ERROR) << "The size " << data_size << " of data cannot be less that the memory size required by the shape "
|
||||
<< shape << " and data type " << TypeIdToString(static_cast<enum TypeId>(type));
|
||||
return MSTensor(nullptr);
|
||||
}
|
||||
impl->SetDeviceData(data);
|
||||
return MSTensor(impl);
|
||||
} catch (const std::bad_alloc &) {
|
||||
MS_LOG(ERROR) << "Malloc memory failed.";
|
||||
return nullptr;
|
||||
return MSTensor(nullptr);
|
||||
} catch (...) {
|
||||
MS_LOG(ERROR) << "Unknown error occurred.";
|
||||
return nullptr;
|
||||
return MSTensor(nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -399,13 +323,17 @@ MSTensor::MSTensor(std::nullptr_t) : impl_(nullptr) {}
|
|||
MSTensor::MSTensor(const std::shared_ptr<Impl> &impl) : impl_(impl) { MS_EXCEPTION_IF_NULL(impl); }
|
||||
MSTensor::MSTensor(const std::vector<char> &name, enum DataType type, const std::vector<int64_t> &shape,
|
||||
const void *data, size_t data_len)
|
||||
: impl_(std::make_shared<TensorDefaultImpl>(CharToString(name), type, shape, data, data_len)) {}
|
||||
: impl_(std::make_shared<TensorDefaultImpl>(CharToString(name), type, shape, data, data_len, false, false)) {}
|
||||
MSTensor::~MSTensor() = default;
|
||||
|
||||
bool MSTensor::operator==(std::nullptr_t) const { return impl_ == nullptr; }
|
||||
|
||||
bool MSTensor::operator!=(std::nullptr_t) const { return impl_ != nullptr; }
|
||||
|
||||
bool MSTensor::operator==(const MSTensor &tensor) const { return impl_ == tensor.impl_; }
|
||||
|
||||
bool MSTensor::operator!=(const MSTensor &tensor) const { return impl_ != tensor.impl_; }
|
||||
|
||||
MSTensor *MSTensor::Clone() const {
|
||||
MS_EXCEPTION_IF_NULL(impl_);
|
||||
try {
|
||||
|
@ -478,7 +406,7 @@ void MSTensor::SetDataType(enum DataType data_type) {
|
|||
|
||||
void MSTensor::SetTensorName(const std::vector<char> &tensor_name) {
|
||||
MS_EXCEPTION_IF_NULL(impl_);
|
||||
std::static_pointer_cast<MutableTensorImpl>(impl_)->SetTensorName(CharToString(tensor_name));
|
||||
std::static_pointer_cast<MutableTensorImpl>(impl_)->SetName(CharToString(tensor_name));
|
||||
}
|
||||
|
||||
void MSTensor::SetAllocator(std::shared_ptr<Allocator> allocator) {
|
||||
|
@ -498,17 +426,27 @@ void MSTensor::SetFormat(mindspore::Format format) {
|
|||
|
||||
mindspore::Format MSTensor::format() const {
|
||||
MS_EXCEPTION_IF_NULL(impl_);
|
||||
return std::static_pointer_cast<MutableTensorImpl>(impl_)->GetFormat();
|
||||
return std::static_pointer_cast<MutableTensorImpl>(impl_)->Format();
|
||||
}
|
||||
|
||||
void MSTensor::SetData(void *data) {
|
||||
void MSTensor::SetData(void *data, bool own_data) {
|
||||
MS_EXCEPTION_IF_NULL(impl_);
|
||||
std::static_pointer_cast<MutableTensorImpl>(impl_)->SetData(data);
|
||||
std::static_pointer_cast<MutableTensorImpl>(impl_)->SetData(data, own_data);
|
||||
}
|
||||
|
||||
void MSTensor::SetDeviceData(void *data) {
|
||||
MS_EXCEPTION_IF_NULL(impl_);
|
||||
std::static_pointer_cast<MutableTensorImpl>(impl_)->SetDeviceData(data);
|
||||
}
|
||||
|
||||
void *MSTensor::GetDeviceData() {
|
||||
MS_EXCEPTION_IF_NULL(impl_);
|
||||
return std::static_pointer_cast<MutableTensorImpl>(impl_)->GetDeviceData();
|
||||
}
|
||||
|
||||
std::vector<QuantParam> MSTensor::QuantParams() const {
|
||||
MS_EXCEPTION_IF_NULL(impl_);
|
||||
return std::static_pointer_cast<MutableTensorImpl>(impl_)->QuantParams();
|
||||
return std::static_pointer_cast<MutableTensorImpl>(impl_)->GetQuantParams();
|
||||
}
|
||||
|
||||
void MSTensor::SetQuantParams(std::vector<QuantParam> quant_param) {
|
||||
|
|
|
@ -15,9 +15,10 @@
|
|||
*/
|
||||
|
||||
#include "src/extendrt/delegate/tensorrt/distribution/distribution_base.h"
|
||||
#include "src/extendrt/delegate/plugin/tensorrt_executor_plugin.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int GetGPUGroupSize() { return 1; }
|
||||
int GetGPUGroupSize() { return TensorRTPlugin::GetInstance().GetGPUGroupSize(); }
|
||||
|
||||
int GetRankID() { return 0; }
|
||||
int GetRankID() { return TensorRTPlugin::GetInstance().GetRankID(); }
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -38,13 +38,29 @@ class TensorInfoImpl {
|
|||
tensor_val_(tensor_val) {
|
||||
is_const_ = (data_ != nullptr);
|
||||
if (data_ == nullptr || data_len_ == 0) {
|
||||
auto ele_num = std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<int64_t>());
|
||||
auto type_size = DataTypeSize(static_cast<enum TypeId>(dType_));
|
||||
auto ele_num = ElementNum();
|
||||
auto type_size = item_size();
|
||||
temp_data_.resize(ele_num * type_size);
|
||||
data_ = temp_data_.data();
|
||||
data_len_ = temp_data_.size();
|
||||
}
|
||||
}
|
||||
void SetShape(const std::vector<int64_t> &shape) {
|
||||
shape_ = shape;
|
||||
auto new_elem_num = ElementNum();
|
||||
auto type_size = item_size();
|
||||
auto data_size = new_elem_num * type_size;
|
||||
if (data_size != temp_data_.size() && data_ == temp_data_.data()) {
|
||||
temp_data_.resize(data_size);
|
||||
data_ = temp_data_.data();
|
||||
data_len_ = data_size;
|
||||
}
|
||||
}
|
||||
|
||||
int64_t ElementNum() const { return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<int64_t>()); }
|
||||
|
||||
size_t item_size() const { return DataTypeSize(static_cast<enum TypeId>(dType_)); }
|
||||
|
||||
std::string name_;
|
||||
mindspore::DataType dType_ = mindspore::DataType::kTypeUnknown;
|
||||
std::vector<int64_t> shape_;
|
||||
|
@ -109,7 +125,7 @@ size_t TensorInfo::DataSize() const {
|
|||
if (impl_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
return impl_->data_len_;
|
||||
return ElementNum() * item_size();
|
||||
}
|
||||
|
||||
bool TensorInfo::IsConst() const {
|
||||
|
@ -119,13 +135,18 @@ bool TensorInfo::IsConst() const {
|
|||
return impl_->is_const_ && impl_->data_ != nullptr;
|
||||
}
|
||||
|
||||
size_t TensorInfo::item_size() const { return DataTypeSize(static_cast<enum TypeId>(DataType())); }
|
||||
size_t TensorInfo::item_size() const {
|
||||
if (impl_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
return impl_->item_size();
|
||||
}
|
||||
|
||||
void TensorInfo::SetShape(const std::vector<int64_t> &shape) {
|
||||
if (impl_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
impl_->shape_ = shape;
|
||||
impl_->SetShape(shape);
|
||||
}
|
||||
|
||||
void TensorInfo::SetData(const void *data, size_t data_len) {
|
||||
|
@ -140,11 +161,7 @@ int64_t TensorInfo::ElementNum() const {
|
|||
if (impl_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
if (impl_->shape_.empty()) {
|
||||
// element number of scalar is 1
|
||||
return 1;
|
||||
}
|
||||
return std::accumulate(impl_->shape_.begin(), impl_->shape_.end(), 1, std::multiplies<int64_t>());
|
||||
return impl_->ElementNum();
|
||||
}
|
||||
|
||||
TensorInfo &TensorInfo::operator=(const TensorInfo &other) {
|
||||
|
|
|
@ -106,6 +106,33 @@ int TensorRTAllocator::SyncMemDeviceToHost(tensor::Tensor *host_tensor, const st
|
|||
return SyncMemInHostAndDevice(host_tensor->data_c(), device_tensor_name, host_tensor->Size(), false, sync);
|
||||
}
|
||||
|
||||
int TensorRTAllocator::SyncMemDeviceToHost(void *dst_data, size_t data_size, const std::string &device_tensor_name) {
|
||||
if (dst_data == nullptr) {
|
||||
MS_LOG(ERROR) << " dst host data cannot be nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto it = cuda_tensor_map_.find(device_tensor_name);
|
||||
if (it == cuda_tensor_map_.end()) {
|
||||
MS_LOG(ERROR) << " cannot find device address " << device_tensor_name;
|
||||
return RET_ERROR;
|
||||
}
|
||||
CudaTensorParam ¤t_cuda_tensor = it->second;
|
||||
// is memcpy from device to host, the host mem is valid, change tag for mem pool.
|
||||
current_cuda_tensor.is_valid_mem = true;
|
||||
auto device_ptr = current_cuda_tensor.data;
|
||||
if (device_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "device_ptr is null for " << device_tensor_name;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto cuda_ret = cudaMemcpy(dst_data, device_ptr, data_size, cudaMemcpyDeviceToHost);
|
||||
if (cuda_ret != cudaSuccess) {
|
||||
MS_LOG(ERROR) << "copy mem failed,ret " << cudaGetErrorName(cuda_ret);
|
||||
return RET_ERROR;
|
||||
}
|
||||
MS_LOG(INFO) << "cuda memcpy success for " << device_tensor_name;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int TensorRTAllocator::SyncMemInHostAndDevice(tensor::Tensor *host_tensor, const std::string &device_tensor_name,
|
||||
bool is_host2device, bool sync) {
|
||||
if (host_tensor == NULL) {
|
||||
|
|
|
@ -54,6 +54,7 @@ class TensorRTAllocator {
|
|||
|
||||
int SyncMemHostToDevice(const tensor::Tensor &host_tensor, const std::string &device_tensor_name, bool sync = true);
|
||||
int SyncMemDeviceToHost(tensor::Tensor *host_tensor, const std::string &device_tensor_name, bool sync = true);
|
||||
int SyncMemDeviceToHost(void *dst_data, size_t data_size, const std::string &device_tensor_name);
|
||||
|
||||
int ClearDeviceMem();
|
||||
|
||||
|
|
|
@ -295,29 +295,19 @@ Status GetModelOutputsInfo(KernelGraphPtr kernel_graph, std::vector<NodeWithOutp
|
|||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
MS_EXCEPTION_IF_NULL(tensor_info_list_ptr);
|
||||
auto &tensor_info_list = *tensor_info_list_ptr;
|
||||
auto kernel_graph_outputs = kernel_graph->outputs();
|
||||
auto outputs = kernel_graph->outputs();
|
||||
// find parameters of graph inputs
|
||||
for (size_t i = 0; i < kernel_graph_outputs.size(); ++i) {
|
||||
auto output = kernel_graph_outputs[i];
|
||||
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||
auto output = outputs[i];
|
||||
auto cur_abstract = output->abstract();
|
||||
size_t output_num = 1;
|
||||
if (cur_abstract->isa<abstract::AbstractTuple>()) {
|
||||
auto abs_tuple = cur_abstract->Clone()->cast<abstract::AbstractTuplePtr>();
|
||||
MS_EXCEPTION_IF_NULL(abs_tuple);
|
||||
size_t output_num = abs_tuple->elements().size();
|
||||
for (size_t output_idx = 0; output_idx < output_num; ++output_idx) {
|
||||
auto tensor_id = common::AnfAlgo::VisitKernelWithReturnType(output, output_idx);
|
||||
auto it =
|
||||
std::find_if(tensor_info_list.begin(), tensor_info_list.end(),
|
||||
[&tensor_id](const NodeWithOutputIndex &index) { return index.kernel_index == tensor_id; });
|
||||
if (it != tensor_info_list.end()) {
|
||||
output_tensors->push_back(it->tensor_info);
|
||||
} else {
|
||||
MS_LOG_ERROR << "Cannot find output tensor info " << tensor_id.first->fullname_with_scope();
|
||||
return mindspore::kLiteError;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto tensor_id = common::AnfAlgo::VisitKernelWithReturnType(output, 0);
|
||||
output_num = abs_tuple->elements().size();
|
||||
}
|
||||
for (size_t output_idx = 0; output_idx < output_num; ++output_idx) {
|
||||
auto tensor_id = common::AnfAlgo::VisitKernelWithReturnType(output, output_idx);
|
||||
auto it =
|
||||
std::find_if(tensor_info_list.begin(), tensor_info_list.end(),
|
||||
[&tensor_id](const NodeWithOutputIndex &index) { return index.kernel_index == tensor_id; });
|
||||
|
@ -443,12 +433,6 @@ Status TensorRTExecutor::BuildSubGraph(const KernelGraphPtr &kernel_graph) {
|
|||
if (status != kSuccess) {
|
||||
return status;
|
||||
}
|
||||
auto build_trt_graph = [kernel_graph](const std::vector<TensorRTOp *> &tensorrt_ops) {
|
||||
auto inputs = GraphInTensors<TensorRTOp>(tensorrt_ops);
|
||||
auto outputs = GraphOutTensors<TensorRTOp>(tensorrt_ops);
|
||||
auto ctx = TrtGraphContext{tensorrt_ops, inputs, outputs, nullptr};
|
||||
return ctx;
|
||||
};
|
||||
for (const auto &kernel_node : kernel_nodes) {
|
||||
auto node_name = kernel_node->fullname_with_scope();
|
||||
std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
|
@ -468,11 +452,11 @@ Status TensorRTExecutor::BuildSubGraph(const KernelGraphPtr &kernel_graph) {
|
|||
tensorrt_op->SetRuntime(this->runtime_);
|
||||
tensorrt_ops.push_back(tensorrt_op);
|
||||
}
|
||||
if (!tensorrt_ops.empty()) {
|
||||
auto trt_ctx = build_trt_graph(tensorrt_ops);
|
||||
tensorrt_ops.clear();
|
||||
tensorrt_graph_list_.push_back(trt_ctx);
|
||||
status = GetModelOutputsInfo(kernel_graph, &tensor_info_list, &outputs_);
|
||||
if (status != kSuccess) {
|
||||
return status;
|
||||
}
|
||||
tensorrt_graph_list_.push_back(TrtGraphContext{tensorrt_ops, inputs_, outputs_, nullptr});
|
||||
status = UpdateTrtSubGraphInputsDepend();
|
||||
if (status != kSuccess) {
|
||||
return status;
|
||||
|
@ -486,10 +470,6 @@ Status TensorRTExecutor::BuildSubGraph(const KernelGraphPtr &kernel_graph) {
|
|||
return mindspore::kLiteError;
|
||||
}
|
||||
}
|
||||
status = GetModelOutputsInfo(kernel_graph, &tensor_info_list, &outputs_);
|
||||
if (status != kSuccess) {
|
||||
return status;
|
||||
}
|
||||
return mindspore::kSuccess;
|
||||
}
|
||||
|
||||
|
@ -625,20 +605,27 @@ bool TensorRTExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<ten
|
|||
MS_LOG(ERROR) << "TensorRTGraph is nullptr.";
|
||||
return false;
|
||||
}
|
||||
tensor_val_map_.clear();
|
||||
if (inputs.size() != inputs_.size()) {
|
||||
MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != execute input size " << inputs.size();
|
||||
MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != execute outputs size " << inputs.size();
|
||||
return false;
|
||||
}
|
||||
if (!outputs->empty() && outputs_.size() != outputs->size()) {
|
||||
MS_LOG(ERROR) << "Graph outputs size " << inputs_.size() << " != expected outputs size " << outputs->size();
|
||||
return false;
|
||||
}
|
||||
if (tensorrt_graph_list_.size() == 1) {
|
||||
return tensorrt_graph_list_[0].sub_graph->Execute(inputs, outputs) == RET_OK;
|
||||
}
|
||||
std::map<TensorInfo, std::shared_ptr<tensor::Tensor>> tensor_val_map;
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
tensor_val_map_[inputs_[i]] = std::make_shared<tensor::Tensor>(inputs[i]);
|
||||
tensor_val_map[inputs_[i]] = std::make_shared<tensor::Tensor>(inputs[i]);
|
||||
}
|
||||
for (auto &sub_graph : tensorrt_graph_list_) {
|
||||
std::vector<tensor::Tensor> sub_inputs;
|
||||
std::vector<tensor::Tensor> sub_outputs;
|
||||
for (auto &item : sub_graph.inputs) {
|
||||
auto it = tensor_val_map_.find(item);
|
||||
if (it == tensor_val_map_.end()) {
|
||||
auto it = tensor_val_map.find(item);
|
||||
if (it == tensor_val_map.end()) {
|
||||
MS_LOG(ERROR) << "Cannot find input tensor " << item.Name() << " in tensor val map";
|
||||
return false;
|
||||
}
|
||||
|
@ -659,12 +646,13 @@ bool TensorRTExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<ten
|
|||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < sub_graph.outputs.size(); i++) {
|
||||
tensor_val_map_[sub_graph.outputs[i]] = std::make_shared<tensor::Tensor>(sub_outputs[i]);
|
||||
tensor_val_map[sub_graph.outputs[i]] = std::make_shared<tensor::Tensor>(sub_outputs[i]);
|
||||
}
|
||||
}
|
||||
outputs->clear();
|
||||
for (auto &item : outputs_) {
|
||||
auto it = tensor_val_map_.find(item);
|
||||
if (it == tensor_val_map_.end()) {
|
||||
auto it = tensor_val_map.find(item);
|
||||
if (it == tensor_val_map.end()) {
|
||||
MS_LOG(ERROR) << "Cannot find input tensor " << item.Name() << " in tensor val map";
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -81,7 +81,6 @@ class TensorRTExecutor : public device::GraphExecutor {
|
|||
cudaStream_t stream_{nullptr};
|
||||
std::vector<kernel::Kernel> kernel_list_;
|
||||
|
||||
std::map<TensorInfo, std::shared_ptr<tensor::Tensor>> tensor_val_map_;
|
||||
std::vector<TrtGraphContext> tensorrt_graph_list_;
|
||||
|
||||
std::vector<nvinfer1::Dims> min_dims_;
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <functional>
|
||||
#include <fstream>
|
||||
#include "src/extendrt/delegate/delegate_utils.h"
|
||||
#include "src/common/utils.h"
|
||||
|
||||
#include "ops/transpose.h"
|
||||
#include "ops/reshape.h"
|
||||
|
@ -482,24 +483,16 @@ int TensorRTSubGraph::Prepare() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
for (auto tensor : outputs_) {
|
||||
for (auto &tensor : outputs_) {
|
||||
int index = this->engine_->getBindingIndex(tensor.Name().c_str());
|
||||
auto out_dims = trt_context_->getBindingDimensions(index);
|
||||
int elem_num = std::accumulate(out_dims.d, out_dims.d + out_dims.nbDims, 1, std::multiplies<int>());
|
||||
DebugDims(out_dims);
|
||||
std::map<enum DataType, size_t> TypeByte = {
|
||||
{DataType::kTypeUnknown, 0}, {DataType::kObjectTypeString, 0}, {DataType::kNumberTypeBool, 1},
|
||||
{DataType::kNumberTypeInt8, 1}, {DataType::kNumberTypeInt16, 2}, {DataType::kNumberTypeInt32, 4},
|
||||
{DataType::kNumberTypeInt64, 8}, {DataType::kNumberTypeUInt8, 1}, {DataType::kNumberTypeUInt16, 2},
|
||||
{DataType::kNumberTypeUInt32, 4}, {DataType::kNumberTypeUInt64, 8}, {DataType::kNumberTypeFloat16, 2},
|
||||
{DataType::kNumberTypeFloat32, 4}, {DataType::kNumberTypeFloat64, 8},
|
||||
};
|
||||
if (tensor.Data() == nullptr) {
|
||||
MS_LOG(INFO) << "Set output shape by tensorrt binding output";
|
||||
tensor.SetShape(lite::ConvertMSShape(out_dims));
|
||||
tensor.MutableData();
|
||||
}
|
||||
auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor, elem_num * TypeByte[tensor.DataType()]);
|
||||
auto new_shape = lite::ConvertMSShape(out_dims);
|
||||
MS_LOG(INFO) << "Set output shape of " << tensor.Name() << " to " << new_shape << " by tensorrt binding output";
|
||||
tensor.SetShape(new_shape);
|
||||
auto type_size = DataTypeSize(static_cast<enum TypeId>(tensor.DataType()));
|
||||
auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor, elem_num * type_size);
|
||||
if (device_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc for outputs tensor device memory failed.";
|
||||
return RET_ERROR;
|
||||
|
@ -510,37 +503,20 @@ int TensorRTSubGraph::Prepare() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int TensorRTSubGraph::ReSizeIfNeed(const std::vector<tensor::Tensor> &inputs) {
|
||||
bool need_resize = false;
|
||||
int TensorRTSubGraph::OnNewInputShapes(const std::vector<tensor::Tensor> &inputs) {
|
||||
if (inputs_.size() != inputs.size()) {
|
||||
MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != resize input size " << inputs.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (size_t i = 0; i < inputs_.size(); i++) {
|
||||
if (inputs_[i].Shape() != inputs[i].shape()) {
|
||||
need_resize = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (need_resize) {
|
||||
return ReSize(inputs);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int TensorRTSubGraph::ReSize(const std::vector<tensor::Tensor> &inputs) {
|
||||
if (inputs_.size() != inputs.size()) {
|
||||
MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != resize input size " << inputs.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (input_batchsize_index_ == -1) {
|
||||
MS_LOG(ERROR) << "current network don't support resize.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
int batch_size = -1;
|
||||
for (size_t i = 0; i < trt_in_tensor_name_.size(); i++) {
|
||||
if (inputs_[i].Shape() == inputs[i].shape()) {
|
||||
continue;
|
||||
}
|
||||
if (input_batchsize_index_ == -1) {
|
||||
MS_LOG(ERROR) << "current network don't support resize.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
inputs_[i].SetShape(inputs[i].shape());
|
||||
if (ctx_->network() != nullptr) {
|
||||
for (int j = 0; j < ctx_->network()->getNbInputs(); j++) {
|
||||
|
@ -558,18 +534,16 @@ int TensorRTSubGraph::ReSize(const std::vector<tensor::Tensor> &inputs) {
|
|||
|
||||
MS_LOG(INFO) << "resize at input_batch_index " << input_batchsize_index_ << ", update batch size to "
|
||||
<< inputs_[i].Shape()[input_batchsize_index_];
|
||||
runtime_->SetBatchSize(inputs_[i].Shape()[input_batchsize_index_]);
|
||||
|
||||
// inputs_ is dupulated by mindrt, name is untustable.
|
||||
auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(trt_in_tensor_name_[i], inputs_[i].DataSize(),
|
||||
ConvertDataType(inputs_[i].DataType()));
|
||||
if (device_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "realloc for input tensor device memory failed.";
|
||||
int new_batch_size = inputs_[i].Shape()[input_batchsize_index_];
|
||||
if (batch_size != -1 && batch_size != new_batch_size) {
|
||||
MS_LOG(ERROR) << "Batch size " << batch_size << " of input 0 != batch size " << new_batch_size << " of input "
|
||||
<< i;
|
||||
return RET_ERROR;
|
||||
}
|
||||
batch_size = new_batch_size;
|
||||
runtime_->SetBatchSize(batch_size);
|
||||
|
||||
int index = this->engine_->getBindingIndex(trt_in_tensor_name_[i].c_str());
|
||||
MS_LOG(INFO) << "device index " << index << " for tensor : " << trt_in_tensor_name_[i] << " attr: " << device_ptr;
|
||||
tensor_bindings_[index] = device_ptr;
|
||||
// Set actual input size
|
||||
nvinfer1::Dims input_dims = ConvertCudaDims(inputs_[i].Shape());
|
||||
for (int od = 0; od < input_dims.nbDims; od++) {
|
||||
|
@ -585,6 +559,140 @@ int TensorRTSubGraph::ReSize(const std::vector<tensor::Tensor> &inputs) {
|
|||
MS_LOG(ERROR) << "input dims need to be specified.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (batch_size != -1) {
|
||||
for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
|
||||
int index = this->engine_->getBindingIndex(trt_out_tensor_name_[i].c_str());
|
||||
auto out_dims = trt_context_->getBindingDimensions(index);
|
||||
auto new_shape = lite::ConvertMSShape(out_dims);
|
||||
MS_LOG(INFO) << "Set output shape of " << trt_out_tensor_name_[i] << " to " << new_shape
|
||||
<< " by tensorrt binding output";
|
||||
outputs_[i].SetShape(new_shape);
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int TensorRTSubGraph::PreExecute(const std::vector<tensor::Tensor> &inputs,
|
||||
const std::vector<tensor::Tensor> &outputs) {
|
||||
if (inputs_.size() != inputs.size()) {
|
||||
MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != execute inputs size " << inputs.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (!outputs.empty() && outputs.size() != outputs_.size()) {
|
||||
MS_LOG(ERROR) << "Graph outputs size " << outputs_.size() << " != execute outputs size " << outputs.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto ret = OnNewInputShapes(inputs);
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
for (size_t i = 0; i < trt_in_tensor_name_.size(); i++) {
|
||||
auto trt_tensor_name = trt_in_tensor_name_[i];
|
||||
void *device_ptr = nullptr;
|
||||
auto input_device_address = inputs[i].device_address();
|
||||
if (input_device_address != nullptr && input_device_address->GetMutablePtr() != nullptr) {
|
||||
device_ptr = input_device_address->GetMutablePtr();
|
||||
} else {
|
||||
device_ptr = runtime_->GetAllocator()->MallocDeviceMem(trt_tensor_name, inputs_[i].DataSize(),
|
||||
ConvertDataType(inputs_[i].DataType()));
|
||||
if (device_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "realloc for input tensor device memory failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ret = runtime_->GetAllocator()->SyncMemHostToDevice(inputs[i], trt_tensor_name);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "sync mem from host to device failed for " << trt_tensor_name;
|
||||
return RET_ERROR;
|
||||
}
|
||||
runtime_->GetAllocator()->MarkMemValid(trt_tensor_name, true);
|
||||
}
|
||||
int index = this->engine_->getBindingIndex(trt_tensor_name.c_str());
|
||||
MS_LOG(INFO) << "device index " << index << " for tensor : " << trt_tensor_name << " attr: " << device_ptr;
|
||||
tensor_bindings_[index] = device_ptr;
|
||||
}
|
||||
for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
|
||||
const auto &trt_out_tensor_name = trt_out_tensor_name_[i];
|
||||
int index = this->engine_->getBindingIndex(trt_out_tensor_name.c_str());
|
||||
void *device_ptr = nullptr;
|
||||
if (outputs.size() > i) {
|
||||
auto &output = outputs[i];
|
||||
if (output.device_address() && output.device_address()->GetMutablePtr()) {
|
||||
device_ptr = output.device_address()->GetMutablePtr();
|
||||
}
|
||||
}
|
||||
if (!device_ptr) {
|
||||
device_ptr = runtime_->GetAllocator()->MallocDeviceMem(trt_out_tensor_name, outputs_[i].DataSize(),
|
||||
ConvertDataType(outputs_[i].DataType()));
|
||||
if (device_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "realloc for outputs tensor device memory failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
tensor_bindings_[index] = device_ptr;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int TensorRTSubGraph::PostExecute(std::vector<tensor::Tensor> *outputs) {
|
||||
if (!outputs->empty() && outputs->size() != outputs_.size()) {
|
||||
MS_LOG(ERROR) << "Graph outputs size " << outputs_.size() << " != execute outputs size " << outputs->size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto has_outputs = !outputs->empty();
|
||||
for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
|
||||
const auto &trt_out_tensor_name = trt_out_tensor_name_[i];
|
||||
int index = this->engine_->getBindingIndex(trt_out_tensor_name.c_str());
|
||||
// actual output tensor dims
|
||||
auto out_dims = this->trt_context_->getBindingDimensions(index);
|
||||
std::vector<int64_t> new_shape = lite::ConvertMSShape(out_dims);
|
||||
// batchsize resize need set new batch size
|
||||
if (input_batchsize_index_ != -1) {
|
||||
if (runtime_->GetBatchSize() != new_shape[output_batchsize_index_]) {
|
||||
new_shape[output_batchsize_index_] = runtime_->GetBatchSize();
|
||||
}
|
||||
}
|
||||
outputs_[i].SetShape(new_shape);
|
||||
for (int od = 0; od < out_dims.nbDims; od++) {
|
||||
MS_LOG(DEBUG) << "out tensor " << trt_out_tensor_name << " dims at " << od << " is " << new_shape[od];
|
||||
}
|
||||
runtime_->GetAllocator()->MarkMemValid(trt_out_tensor_name, true);
|
||||
if (has_outputs) {
|
||||
auto &tensor = outputs->at(i);
|
||||
auto dst_device = tensor.device_address();
|
||||
if (dst_device == nullptr || dst_device->GetMutablePtr() == nullptr) {
|
||||
if (tensor.Size() < outputs_[i].DataSize()) {
|
||||
MS_LOG(ERROR) << "Parameter output data size " << tensor.Size()
|
||||
<< " cannot less than execute output data size " << outputs_[i].DataSize()
|
||||
<< ", output shape: " << new_shape;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto host_address = tensor.data_c();
|
||||
if (host_address == nullptr) {
|
||||
MS_LOG(ERROR) << "Specified output device or host address cannot be nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
int sync_ret =
|
||||
runtime_->GetAllocator()->SyncMemDeviceToHost(host_address, outputs_[i].DataSize(), trt_out_tensor_name);
|
||||
if (sync_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "sync mem from device to host failed for " << trt_out_tensor_name;
|
||||
return sync_ret;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tensor::Tensor output_tensor(static_cast<enum TypeId>(outputs_[i].DataType()), new_shape);
|
||||
int sync_ret = runtime_->GetAllocator()->SyncMemDeviceToHost(&output_tensor, trt_out_tensor_name);
|
||||
if (sync_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "sync mem from device to host failed for " << trt_out_tensor_name;
|
||||
return sync_ret;
|
||||
}
|
||||
outputs->push_back(output_tensor);
|
||||
}
|
||||
runtime_->GetAllocator()->MarkMemValid(trt_out_tensor_name, false);
|
||||
}
|
||||
// make mem invalid, prepare for next execute
|
||||
for (size_t i = 0; i < inputs_.size(); i++) {
|
||||
runtime_->GetAllocator()->MarkMemValid(trt_in_tensor_name_[i], false);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -612,63 +720,20 @@ bool TensorRTSubGraph::ValidInputResizeDims(const nvinfer1::Dims &construct_dims
|
|||
}
|
||||
|
||||
int TensorRTSubGraph::Execute(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
|
||||
int ret = ReSizeIfNeed(inputs);
|
||||
int ret = lite::SetCudaDevice(device_info_);
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
ret = lite::SetCudaDevice(device_info_);
|
||||
outputs->clear();
|
||||
ret = PreExecute(inputs, *outputs);
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
if (runtime_->GetAllocator()->GetMemIsValid(trt_in_tensor_name_[i])) {
|
||||
MS_LOG(INFO) << "no need memcpy to cuda for input tensor: " << trt_in_tensor_name_[i];
|
||||
continue;
|
||||
}
|
||||
ret = runtime_->GetAllocator()->SyncMemHostToDevice(inputs[i], trt_in_tensor_name_[i]);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "sync mem from host to device failed for " << trt_in_tensor_name_[i];
|
||||
return ret;
|
||||
}
|
||||
runtime_->GetAllocator()->MarkMemValid(trt_in_tensor_name_[i], true);
|
||||
}
|
||||
|
||||
if (!this->trt_context_->executeV2(tensor_bindings_)) {
|
||||
MS_LOG(ERROR) << "TensorRT execute failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
|
||||
int index = this->engine_->getBindingIndex(trt_out_tensor_name_[i].c_str());
|
||||
// actual output tensor dims
|
||||
auto out_dims = this->trt_context_->getBindingDimensions(index);
|
||||
std::vector<int64_t> new_shape = lite::ConvertMSShape(out_dims);
|
||||
// batchsize resize need set new batch size
|
||||
if (input_batchsize_index_ != -1) {
|
||||
if (runtime_->GetBatchSize() != new_shape[output_batchsize_index_]) {
|
||||
new_shape[output_batchsize_index_] = runtime_->GetBatchSize();
|
||||
}
|
||||
}
|
||||
for (int od = 0; od < out_dims.nbDims; od++) {
|
||||
MS_LOG(DEBUG) << "out tensor " << trt_out_tensor_name_[i] << " dims at " << od << " is " << new_shape[od];
|
||||
}
|
||||
tensor::Tensor output_tensor(static_cast<enum TypeId>(outputs_[i].DataType()), new_shape);
|
||||
outputs_[i].SetShape(new_shape);
|
||||
|
||||
runtime_->GetAllocator()->MarkMemValid(trt_out_tensor_name_[i], true);
|
||||
int sync_ret = runtime_->GetAllocator()->SyncMemDeviceToHost(&output_tensor, trt_out_tensor_name_[i]);
|
||||
if (sync_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "sync mem from device to host failed for " << trt_out_tensor_name_[i];
|
||||
return sync_ret;
|
||||
}
|
||||
runtime_->GetAllocator()->MarkMemValid(trt_out_tensor_name_[i], false);
|
||||
outputs->push_back(output_tensor);
|
||||
}
|
||||
// make mem invalid, prepare for next execute
|
||||
for (size_t i = 0; i < inputs_.size(); i++) {
|
||||
runtime_->GetAllocator()->MarkMemValid(trt_in_tensor_name_[i], false);
|
||||
}
|
||||
return RET_OK;
|
||||
return PostExecute(outputs);
|
||||
}
|
||||
|
||||
ITensorHelper TensorRTSubGraph::FindTensorRTInputs(TensorRTOp *cur_op, const TensorInfo &in_tensor) {
|
||||
|
|
|
@ -50,9 +50,6 @@ class TensorRTSubGraph {
|
|||
|
||||
int Execute(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs);
|
||||
|
||||
int ReSizeIfNeed(const std::vector<tensor::Tensor> &inputs);
|
||||
int ReSize(const std::vector<tensor::Tensor> &inputs);
|
||||
|
||||
int BuildTensorRTGraph();
|
||||
|
||||
int Init(cudaStream_t stream);
|
||||
|
@ -92,6 +89,11 @@ class TensorRTSubGraph {
|
|||
|
||||
bool ValidInputResizeDims(const nvinfer1::Dims &construct_dims, const std::vector<int64_t> &resize_input_shape);
|
||||
|
||||
int PreExecute(const std::vector<tensor::Tensor> &inputs, const std::vector<tensor::Tensor> &outputs);
|
||||
int PostExecute(std::vector<tensor::Tensor> *outputs);
|
||||
|
||||
int OnNewInputShapes(const std::vector<tensor::Tensor> &inputs);
|
||||
|
||||
std::string name_;
|
||||
std::vector<TensorInfo> inputs_;
|
||||
std::vector<TensorInfo> outputs_;
|
||||
|
|
|
@ -43,15 +43,15 @@ class DefaultInferSession : public InferSession {
|
|||
Status Init(const std::shared_ptr<Context> context) override;
|
||||
Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override;
|
||||
Status RunGraph() override;
|
||||
Status RunGraph(const std::vector<tensor::TensorPtr> &inputs, std::vector<tensor::TensorPtr> *outputs) override;
|
||||
Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) override;
|
||||
Status Resize(const std::vector<tensor::TensorPtr> &inputs, const std::vector<std::vector<int64_t>> &dims) override;
|
||||
|
||||
std::vector<tensor::TensorPtr> GetOutputs() override;
|
||||
std::vector<tensor::TensorPtr> GetInputs() override;
|
||||
std::vector<MutableTensorImplPtr> GetOutputs() override;
|
||||
std::vector<MutableTensorImplPtr> GetInputs() override;
|
||||
std::vector<std::string> GetOutputNames() override;
|
||||
std::vector<std::string> GetInputNames() override;
|
||||
tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) override;
|
||||
tensor::TensorPtr GetInputByTensorName(const std::string &name) override;
|
||||
MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override;
|
||||
MutableTensorImplPtr GetInputByTensorName(const std::string &name) override;
|
||||
|
||||
private:
|
||||
KernelGraphUtilsPtr kernel_graph_utils_;
|
||||
|
@ -71,20 +71,19 @@ Status DefaultInferSession::CompileGraph(FuncGraphPtr graph, const void *data, s
|
|||
}
|
||||
|
||||
Status DefaultInferSession::RunGraph() { return kSuccess; }
|
||||
Status DefaultInferSession::RunGraph(const std::vector<tensor::TensorPtr> &inputs,
|
||||
std::vector<tensor::TensorPtr> *outputs) {
|
||||
Status DefaultInferSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
|
||||
return kSuccess;
|
||||
}
|
||||
Status DefaultInferSession::Resize(const std::vector<tensor::TensorPtr> &inputs,
|
||||
const std::vector<std::vector<int64_t>> &dims) {
|
||||
return kSuccess;
|
||||
}
|
||||
std::vector<tensor::TensorPtr> DefaultInferSession::GetOutputs() { return std::vector<tensor::TensorPtr>(); }
|
||||
std::vector<tensor::TensorPtr> DefaultInferSession::GetInputs() { return std::vector<tensor::TensorPtr>(); }
|
||||
std::vector<MutableTensorImplPtr> DefaultInferSession::GetOutputs() { return {}; }
|
||||
std::vector<MutableTensorImplPtr> DefaultInferSession::GetInputs() { return {}; }
|
||||
std::vector<std::string> DefaultInferSession::GetOutputNames() { return std::vector<std::string>(); }
|
||||
std::vector<std::string> DefaultInferSession::GetInputNames() { return std::vector<std::string>(); }
|
||||
tensor::TensorPtr DefaultInferSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
|
||||
tensor::TensorPtr DefaultInferSession::GetInputByTensorName(const std::string &name) { return nullptr; }
|
||||
MutableTensorImplPtr DefaultInferSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
|
||||
MutableTensorImplPtr DefaultInferSession::GetInputByTensorName(const std::string &name) { return nullptr; }
|
||||
std::shared_ptr<InferSession> InferSession::CreateSession(const std::shared_ptr<Context> context) {
|
||||
HandleGPUContext(context);
|
||||
auto config = SelectSessionArg(context);
|
||||
|
|
|
@ -27,6 +27,8 @@
|
|||
#include "ir/func_graph.h"
|
||||
#include "backend/graph_compiler/graph_partition.h"
|
||||
#include "extendrt/session/type.h"
|
||||
#include "common/mutable_tensor_impl.h"
|
||||
#include "extendrt/utils/kernel_graph_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
class InferSession : public std::enable_shared_from_this<InferSession> {
|
||||
|
@ -37,16 +39,16 @@ class InferSession : public std::enable_shared_from_this<InferSession> {
|
|||
virtual Status Init(const std::shared_ptr<Context> context) = 0;
|
||||
virtual Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) = 0;
|
||||
virtual Status RunGraph() = 0;
|
||||
virtual Status RunGraph(const std::vector<tensor::TensorPtr> &inputs, std::vector<tensor::TensorPtr> *outputs) = 0;
|
||||
virtual Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) = 0;
|
||||
virtual Status Resize(const std::vector<tensor::TensorPtr> &inputs,
|
||||
const std::vector<std::vector<int64_t>> &dims) = 0;
|
||||
|
||||
virtual std::vector<tensor::TensorPtr> GetOutputs() = 0;
|
||||
virtual std::vector<tensor::TensorPtr> GetInputs() = 0;
|
||||
virtual std::vector<MutableTensorImplPtr> GetOutputs() = 0;
|
||||
virtual std::vector<MutableTensorImplPtr> GetInputs() = 0;
|
||||
virtual std::vector<std::string> GetOutputNames() = 0;
|
||||
virtual std::vector<std::string> GetInputNames() = 0;
|
||||
virtual tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) = 0;
|
||||
virtual tensor::TensorPtr GetInputByTensorName(const std::string &name) = 0;
|
||||
virtual MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) = 0;
|
||||
virtual MutableTensorImplPtr GetInputByTensorName(const std::string &name) = 0;
|
||||
|
||||
protected:
|
||||
FuncGraphPtr graph_;
|
||||
|
|
|
@ -28,20 +28,19 @@ Status DelegateSession::Init(const std::shared_ptr<Context> context) { return kS
|
|||
Status DelegateSession::CompileGraph(FuncGraphPtr graph, const void *data, size_t size) { return kSuccess; }
|
||||
|
||||
Status DelegateSession::RunGraph() { return kSuccess; }
|
||||
Status DelegateSession::RunGraph(const std::vector<tensor::TensorPtr> &inputs,
|
||||
std::vector<tensor::TensorPtr> *outputs) {
|
||||
Status DelegateSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
|
||||
return kSuccess;
|
||||
}
|
||||
Status DelegateSession::Resize(const std::vector<tensor::TensorPtr> &inputs,
|
||||
const std::vector<std::vector<int64_t>> &dims) {
|
||||
return kSuccess;
|
||||
}
|
||||
std::vector<tensor::TensorPtr> DelegateSession::GetOutputs() { return std::vector<tensor::TensorPtr>(); }
|
||||
std::vector<tensor::TensorPtr> DelegateSession::GetInputs() { return std::vector<tensor::TensorPtr>(); }
|
||||
std::vector<MutableTensorImplPtr> DelegateSession::GetOutputs() { return {}; }
|
||||
std::vector<MutableTensorImplPtr> DelegateSession::GetInputs() { return {}; }
|
||||
std::vector<std::string> DelegateSession::GetOutputNames() { return std::vector<std::string>(); }
|
||||
std::vector<std::string> DelegateSession::GetInputNames() { return std::vector<std::string>(); }
|
||||
tensor::TensorPtr DelegateSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
|
||||
tensor::TensorPtr DelegateSession::GetInputByTensorName(const std::string &name) { return nullptr; }
|
||||
MutableTensorImplPtr DelegateSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
|
||||
MutableTensorImplPtr DelegateSession::GetInputByTensorName(const std::string &name) { return nullptr; }
|
||||
|
||||
static std::shared_ptr<InferSession> DelegateSessionCreator(const SessionConfig &config) {
|
||||
auto delegates = config.delegates_;
|
||||
|
|
|
@ -32,15 +32,15 @@ class DelegateSession : public InferSession {
|
|||
Status Init(const std::shared_ptr<Context> context) override;
|
||||
Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override;
|
||||
Status RunGraph() override;
|
||||
Status RunGraph(const std::vector<tensor::TensorPtr> &inputs, std::vector<tensor::TensorPtr> *outputs) override;
|
||||
Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) override;
|
||||
Status Resize(const std::vector<tensor::TensorPtr> &inputs, const std::vector<std::vector<int64_t>> &dims) override;
|
||||
|
||||
std::vector<tensor::TensorPtr> GetOutputs() override;
|
||||
std::vector<tensor::TensorPtr> GetInputs() override;
|
||||
std::vector<MutableTensorImplPtr> GetOutputs() override;
|
||||
std::vector<MutableTensorImplPtr> GetInputs() override;
|
||||
std::vector<std::string> GetOutputNames() override;
|
||||
std::vector<std::string> GetInputNames() override;
|
||||
tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) override;
|
||||
tensor::TensorPtr GetInputByTensorName(const std::string &name) override;
|
||||
MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override;
|
||||
MutableTensorImplPtr GetInputByTensorName(const std::string &name) override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<mindspore::Delegate> delegate_;
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
#include <memory>
|
||||
|
||||
#include "extendrt/session/graph_executor_session.h"
|
||||
#include "extendrt/utils/tensor_utils.h"
|
||||
#include "src/extendrt/utils/kernel_build_utils.h"
|
||||
#include "extendrt/utils/tensor_default_impl.h"
|
||||
|
||||
namespace mindspore {
|
||||
Status GraphExecutorSession::Init(const std::shared_ptr<Context> context) {
|
||||
|
@ -38,39 +38,57 @@ Status GraphExecutorSession::CompileGraph(FuncGraphPtr graph, const void *data,
|
|||
for (const auto &kernel_node : kernel_nodes) {
|
||||
mindspore::infer::SetKernelInfo(kernel_node);
|
||||
}
|
||||
if (graph_executor_->CompileGraph(kernel_graph_, options_)) {
|
||||
kernel_graph_utils_->GetModelInputsInfo(kernel_graph_->graph_id(), &inputs_, &input_names_);
|
||||
kernel_graph_utils_->GetModelOutputsInfo(kernel_graph_->graph_id(), &outputs_, &output_names_);
|
||||
return kSuccess;
|
||||
if (!graph_executor_->CompileGraph(kernel_graph_, options_)) {
|
||||
return kCoreFailed;
|
||||
}
|
||||
return kCoreFailed;
|
||||
std::vector<tensor::TensorPtr> graph_inputs, graph_outputs;
|
||||
kernel_graph_utils_->GetModelInputsInfo(kernel_graph_->graph_id(), &graph_inputs, &input_names_);
|
||||
kernel_graph_utils_->GetModelOutputsInfo(kernel_graph_->graph_id(), &graph_outputs, &output_names_);
|
||||
if (graph_inputs.size() != input_names_.size()) {
|
||||
MS_LOG(ERROR) << "Graph input size " << graph_inputs.size() << " != input names size " << input_names_.size();
|
||||
return kCoreFailed;
|
||||
}
|
||||
if (graph_outputs.size() != output_names_.size()) {
|
||||
MS_LOG(ERROR) << "Graph output size " << graph_outputs.size() << " != output names size " << output_names_.size();
|
||||
return kCoreFailed;
|
||||
}
|
||||
for (size_t i = 0; i < input_names_.size(); i++) {
|
||||
auto &input = graph_inputs[i];
|
||||
auto data_type = static_cast<enum DataType>(input->data_type());
|
||||
auto impl = std::make_shared<TensorDefaultImpl>(input_names_[i], data_type, input->shape_c());
|
||||
inputs_.push_back(impl);
|
||||
}
|
||||
for (size_t i = 0; i < output_names_.size(); i++) {
|
||||
auto &output = graph_outputs[i];
|
||||
auto data_type = static_cast<enum DataType>(output->data_type());
|
||||
auto impl = std::make_shared<TensorDefaultImpl>(output_names_[i], data_type, output->shape_c());
|
||||
outputs_.push_back(impl);
|
||||
}
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
Status GraphExecutorSession::RunGraph() { return kSuccess; }
|
||||
Status GraphExecutorSession::RunGraph(const std::vector<tensor::TensorPtr> &inputs,
|
||||
std::vector<tensor::TensorPtr> *outputs) {
|
||||
|
||||
Status GraphExecutorSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
|
||||
MS_LOG(INFO) << "GraphExecutorSession::RunGraph";
|
||||
MS_EXCEPTION_IF_NULL(graph_executor_);
|
||||
MS_EXCEPTION_IF_NULL(outputs);
|
||||
std::vector<tensor::Tensor> executor_inputs, executor_outputs;
|
||||
executor_inputs = TensorUtils::TensorPtrToTensor(inputs);
|
||||
auto ret = graph_executor_->RunGraph(kernel_graph_, executor_inputs, &executor_outputs, options_);
|
||||
auto ret = graph_executor_->RunGraph(kernel_graph_, inputs, outputs, options_);
|
||||
if (!ret) {
|
||||
return kCoreFailed;
|
||||
}
|
||||
*outputs = TensorUtils::TensorToTensorPtr(executor_outputs);
|
||||
inputs_ = inputs;
|
||||
outputs_ = *outputs;
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
Status GraphExecutorSession::Resize(const std::vector<tensor::TensorPtr> &inputs,
|
||||
const std::vector<std::vector<int64_t>> &dims) {
|
||||
return kSuccess;
|
||||
}
|
||||
std::vector<tensor::TensorPtr> GraphExecutorSession::GetOutputs() { return outputs_; }
|
||||
std::vector<tensor::TensorPtr> GraphExecutorSession::GetInputs() { return inputs_; }
|
||||
std::vector<MutableTensorImplPtr> GraphExecutorSession::GetOutputs() { return outputs_; }
|
||||
std::vector<MutableTensorImplPtr> GraphExecutorSession::GetInputs() { return inputs_; }
|
||||
std::vector<std::string> GraphExecutorSession::GetOutputNames() { return output_names_; }
|
||||
std::vector<std::string> GraphExecutorSession::GetInputNames() { return input_names_; }
|
||||
tensor::TensorPtr GraphExecutorSession::GetOutputByTensorName(const std::string &tensorName) {
|
||||
MutableTensorImplPtr GraphExecutorSession::GetOutputByTensorName(const std::string &tensorName) {
|
||||
for (size_t i = 0; i < output_names_.size(); i++) {
|
||||
if (output_names_[i] == tensorName) {
|
||||
return outputs_[i];
|
||||
|
@ -78,7 +96,7 @@ tensor::TensorPtr GraphExecutorSession::GetOutputByTensorName(const std::string
|
|||
}
|
||||
return nullptr;
|
||||
}
|
||||
tensor::TensorPtr GraphExecutorSession::GetInputByTensorName(const std::string &name) {
|
||||
MutableTensorImplPtr GraphExecutorSession::GetInputByTensorName(const std::string &name) {
|
||||
for (size_t i = 0; i < input_names_.size(); i++) {
|
||||
if (input_names_[i] == name) {
|
||||
return inputs_[i];
|
||||
|
|
|
@ -36,24 +36,24 @@ class GraphExecutorSession : public DelegateSession {
|
|||
Status Init(const std::shared_ptr<Context> context) override;
|
||||
Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override;
|
||||
Status RunGraph() override;
|
||||
Status RunGraph(const std::vector<tensor::TensorPtr> &inputs, std::vector<tensor::TensorPtr> *outputs) override;
|
||||
Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) override;
|
||||
Status Resize(const std::vector<tensor::TensorPtr> &inputs, const std::vector<std::vector<int64_t>> &dims) override;
|
||||
|
||||
std::vector<tensor::TensorPtr> GetOutputs() override;
|
||||
std::vector<tensor::TensorPtr> GetInputs() override;
|
||||
std::vector<MutableTensorImplPtr> GetOutputs() override;
|
||||
std::vector<MutableTensorImplPtr> GetInputs() override;
|
||||
std::vector<std::string> GetOutputNames() override;
|
||||
std::vector<std::string> GetInputNames() override;
|
||||
tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) override;
|
||||
tensor::TensorPtr GetInputByTensorName(const std::string &name) override;
|
||||
MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override;
|
||||
MutableTensorImplPtr GetInputByTensorName(const std::string &name) override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<mindspore::device::GraphExecutor> graph_executor_;
|
||||
std::map<std::string, std::string> options_;
|
||||
KernelGraphUtilsPtr kernel_graph_utils_;
|
||||
KernelGraphPtr kernel_graph_;
|
||||
std::vector<tensor::TensorPtr> inputs_;
|
||||
std::vector<MutableTensorImplPtr> inputs_;
|
||||
std::vector<std::string> input_names_;
|
||||
std::vector<tensor::TensorPtr> outputs_;
|
||||
std::vector<MutableTensorImplPtr> outputs_;
|
||||
std::vector<std::string> output_names_;
|
||||
};
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -129,8 +129,7 @@ Status LiteInferSession::RunGraph() {
|
|||
auto ret = lite_session_->RunGraph();
|
||||
return static_cast<StatusCode>(ret);
|
||||
}
|
||||
Status LiteInferSession::RunGraph(const std::vector<tensor::TensorPtr> &inputs,
|
||||
std::vector<tensor::TensorPtr> *outputs) {
|
||||
Status LiteInferSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
|
||||
MS_LOG(INFO) << "SingleOpInferSession::RunGraph with input and outputs";
|
||||
MS_EXCEPTION_IF_NULL(outputs);
|
||||
MS_EXCEPTION_IF_NULL(lite_session_);
|
||||
|
@ -145,7 +144,7 @@ Status LiteInferSession::RunGraph(const std::vector<tensor::TensorPtr> &inputs,
|
|||
std::vector<void *> old_data;
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
auto input = input_tensors.at(i);
|
||||
auto user_input = inputs.at(i);
|
||||
auto user_input = &inputs[i];
|
||||
if (user_input->data_type() != input->data_type()) {
|
||||
ResetTensorData(old_data, input_tensors);
|
||||
MS_LOG(EXCEPTION) << "Tensor " << user_input->id() << " has a different data type from input"
|
||||
|
@ -200,7 +199,7 @@ Status LiteInferSession::RunGraph(const std::vector<tensor::TensorPtr> &inputs,
|
|||
return kLiteError;
|
||||
}
|
||||
outputs->clear();
|
||||
*outputs = TensorUtils::MSTensorToTensorPtr(res);
|
||||
*outputs = TensorUtils::MSTensorToTensor(res);
|
||||
return kSuccess;
|
||||
}
|
||||
Status LiteInferSession::Resize(const std::vector<tensor::TensorPtr> &inputs,
|
||||
|
@ -208,37 +207,23 @@ Status LiteInferSession::Resize(const std::vector<tensor::TensorPtr> &inputs,
|
|||
return kSuccess;
|
||||
}
|
||||
|
||||
std::vector<tensor::TensorPtr> LiteInferSession::GetOutputs() {
|
||||
std::vector<MutableTensorImplPtr> LiteInferSession::GetOutputs() {
|
||||
auto outputs = lite_session_->GetOutputs();
|
||||
std::vector<tensor::TensorPtr> output_tensors;
|
||||
std::vector<MutableTensorImplPtr> output_tensors;
|
||||
for (auto &iter : outputs) {
|
||||
auto output = iter.second;
|
||||
auto type_id = output->data_type();
|
||||
auto shape = output->shape();
|
||||
ShapeVector shape_vec;
|
||||
std::transform(shape.begin(), shape.end(), std::back_inserter(shape_vec),
|
||||
[](int s) { return static_cast<int64_t>(s); });
|
||||
auto data = output->data();
|
||||
auto data_size = output->Size();
|
||||
auto tensor_ptr = std::make_shared<mindspore::tensor::Tensor>(type_id, shape_vec, data, data_size);
|
||||
output_tensors.emplace_back(tensor_ptr);
|
||||
auto impl = std::make_shared<LiteTensorImpl>(output);
|
||||
output_tensors.emplace_back(impl);
|
||||
}
|
||||
return output_tensors;
|
||||
}
|
||||
|
||||
std::vector<tensor::TensorPtr> LiteInferSession::GetInputs() {
|
||||
std::vector<MutableTensorImplPtr> LiteInferSession::GetInputs() {
|
||||
auto inputs = lite_session_->GetInputs();
|
||||
std::vector<tensor::TensorPtr> input_tensors;
|
||||
std::vector<MutableTensorImplPtr> input_tensors;
|
||||
for (auto &input : inputs) {
|
||||
auto type_id = input->data_type();
|
||||
auto shape = input->shape();
|
||||
ShapeVector shape_vec;
|
||||
std::transform(shape.begin(), shape.end(), std::back_inserter(shape_vec),
|
||||
[](int s) { return static_cast<int64_t>(s); });
|
||||
auto data = input->data();
|
||||
auto data_size = input->Size();
|
||||
auto tensor_ptr = std::make_shared<mindspore::tensor::Tensor>(type_id, shape_vec, data, data_size);
|
||||
input_tensors.emplace_back(tensor_ptr);
|
||||
auto impl = std::make_shared<LiteTensorImpl>(input);
|
||||
input_tensors.emplace_back(impl);
|
||||
}
|
||||
return input_tensors;
|
||||
}
|
||||
|
@ -252,8 +237,26 @@ std::vector<std::string> LiteInferSession::GetOutputNames() {
|
|||
}
|
||||
|
||||
std::vector<std::string> LiteInferSession::GetInputNames() { return ConvertToTensorNames(lite_session_->GetInputs()); }
|
||||
tensor::TensorPtr LiteInferSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
|
||||
tensor::TensorPtr LiteInferSession::GetInputByTensorName(const std::string &name) { return nullptr; }
|
||||
MutableTensorImplPtr LiteInferSession::GetOutputByTensorName(const std::string &name) {
|
||||
auto outputs = lite_session_->GetOutputs();
|
||||
for (auto &iter : outputs) {
|
||||
auto output = iter.second;
|
||||
if (output->tensor_name() == name) {
|
||||
return std::make_shared<LiteTensorImpl>(output);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
MutableTensorImplPtr LiteInferSession::GetInputByTensorName(const std::string &name) {
|
||||
auto inputs = lite_session_->GetInputs();
|
||||
for (auto &input : inputs) {
|
||||
if (input->tensor_name() == name) {
|
||||
return std::make_shared<LiteTensorImpl>(input);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<lite::LiteSession> LiteInferSession::CreateLiteSession(lite::InnerContext *context) {
|
||||
auto session = std::make_shared<lite::LiteSession>();
|
||||
|
|
|
@ -32,15 +32,15 @@ class LiteInferSession : public InferSession {
|
|||
Status Init(const std::shared_ptr<Context> context) override;
|
||||
Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override;
|
||||
Status RunGraph() override;
|
||||
Status RunGraph(const std::vector<tensor::TensorPtr> &inputs, std::vector<tensor::TensorPtr> *outputs) override;
|
||||
Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) override;
|
||||
Status Resize(const std::vector<tensor::TensorPtr> &inputs, const std::vector<std::vector<int64_t>> &dims) override;
|
||||
|
||||
std::vector<tensor::TensorPtr> GetOutputs() override;
|
||||
std::vector<tensor::TensorPtr> GetInputs() override;
|
||||
std::vector<MutableTensorImplPtr> GetOutputs() override;
|
||||
std::vector<MutableTensorImplPtr> GetInputs() override;
|
||||
std::vector<std::string> GetOutputNames() override;
|
||||
std::vector<std::string> GetInputNames() override;
|
||||
tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) override;
|
||||
tensor::TensorPtr GetInputByTensorName(const std::string &name) override;
|
||||
MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override;
|
||||
MutableTensorImplPtr GetInputByTensorName(const std::string &name) override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<lite::LiteSession> CreateLiteSession(lite::InnerContext *context);
|
||||
|
@ -54,10 +54,6 @@ class LiteInferSession : public InferSession {
|
|||
private:
|
||||
std::shared_ptr<lite::LiteSession> lite_session_;
|
||||
std::shared_ptr<Context> context_;
|
||||
std::vector<tensor::TensorPtr> inputs_;
|
||||
std::vector<std::string> input_names_;
|
||||
std::vector<tensor::TensorPtr> outputs_;
|
||||
std::vector<std::string> output_names_;
|
||||
};
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "src/extendrt/kernel/ascend/plugin/ascend_kernel_plugin.h"
|
||||
#include "extendrt/session/factory.h"
|
||||
#include "extendrt/utils/runtime_utils.h"
|
||||
#include "extendrt/utils/tensor_default_impl.h"
|
||||
|
||||
namespace mindspore {
|
||||
const size_t tensor_max_size = 0x1000000;
|
||||
|
@ -131,15 +132,34 @@ Status SingleOpInferSession::CompileGraph(FuncGraphPtr graph, const void *data,
|
|||
|
||||
RuntimeUtils::AssignKernelGraphAddress(kernel_graph_);
|
||||
|
||||
kernel_graph_utils_->GetModelInputsInfo(kernel_graph_->graph_id(), &inputs_, &input_names_);
|
||||
kernel_graph_utils_->GetModelOutputsInfo(kernel_graph_->graph_id(), &outputs_, &output_names_);
|
||||
|
||||
std::vector<tensor::TensorPtr> graph_inputs, graph_outputs;
|
||||
kernel_graph_utils_->GetModelInputsInfo(kernel_graph_->graph_id(), &graph_inputs, &input_names_);
|
||||
kernel_graph_utils_->GetModelOutputsInfo(kernel_graph_->graph_id(), &graph_outputs, &output_names_);
|
||||
if (graph_inputs.size() != input_names_.size()) {
|
||||
MS_LOG(ERROR) << "Graph input size " << graph_inputs.size() << " != input names size " << input_names_.size();
|
||||
return kCoreFailed;
|
||||
}
|
||||
if (graph_outputs.size() != output_names_.size()) {
|
||||
MS_LOG(ERROR) << "Graph output size " << graph_outputs.size() << " != output names size " << output_names_.size();
|
||||
return kCoreFailed;
|
||||
}
|
||||
for (size_t i = 0; i < input_names_.size(); i++) {
|
||||
auto &input = graph_inputs[i];
|
||||
auto data_type = static_cast<enum DataType>(input->data_type());
|
||||
auto impl = std::make_shared<TensorDefaultImpl>(input_names_[i], data_type, input->shape_c());
|
||||
inputs_.push_back(impl);
|
||||
}
|
||||
for (size_t i = 0; i < output_names_.size(); i++) {
|
||||
auto &output = graph_outputs[i];
|
||||
auto data_type = static_cast<enum DataType>(output->data_type());
|
||||
auto impl = std::make_shared<TensorDefaultImpl>(output_names_[i], data_type, output->shape_c());
|
||||
outputs_.push_back(impl);
|
||||
}
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
Status SingleOpInferSession::RunGraph() { return kSuccess; }
|
||||
Status SingleOpInferSession::RunGraph(const std::vector<tensor::TensorPtr> &inputs,
|
||||
std::vector<tensor::TensorPtr> *outputs) {
|
||||
Status SingleOpInferSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
|
||||
MS_LOG(INFO) << "SingleOpInferSession::RunGraph with input and outputs";
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_);
|
||||
|
||||
|
@ -179,8 +199,6 @@ Status SingleOpInferSession::RunGraph(const std::vector<tensor::TensorPtr> &inpu
|
|||
}
|
||||
|
||||
RuntimeUtils::CopyOutputTensorsFromKernelGraph(outputs, kernel_graph_);
|
||||
outputs_ = *outputs;
|
||||
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
|
@ -217,7 +235,7 @@ Status SingleOpInferSession::ResizeGraphInputs(const std::vector<tensor::TensorP
|
|||
graph_input_addr->SetSize(tensor_size);
|
||||
}
|
||||
// update input shape
|
||||
inputs_[i]->set_shape(dims[i]);
|
||||
inputs_[i]->SetShape(dims[i]);
|
||||
auto abstract = std::make_shared<abstract::AbstractTensor>(TypeIdToType(type_id), dims[i]);
|
||||
if (abstract == nullptr) {
|
||||
MS_LOG(ERROR) << "Abstract is nullptr.";
|
||||
|
@ -248,13 +266,12 @@ Status SingleOpInferSession::Resize(const std::vector<tensor::TensorPtr> &inputs
|
|||
}
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
std::vector<tensor::TensorPtr> SingleOpInferSession::GetOutputs() { return outputs_; }
|
||||
std::vector<tensor::TensorPtr> SingleOpInferSession::GetInputs() { return inputs_; }
|
||||
std::vector<MutableTensorImplPtr> SingleOpInferSession::GetOutputs() { return outputs_; }
|
||||
std::vector<MutableTensorImplPtr> SingleOpInferSession::GetInputs() { return inputs_; }
|
||||
std::vector<std::string> SingleOpInferSession::GetOutputNames() { return output_names_; }
|
||||
std::vector<std::string> SingleOpInferSession::GetInputNames() { return input_names_; }
|
||||
|
||||
tensor::TensorPtr SingleOpInferSession::GetOutputByTensorName(const std::string &tensor_name) {
|
||||
MutableTensorImplPtr SingleOpInferSession::GetOutputByTensorName(const std::string &tensor_name) {
|
||||
for (size_t idx = 0; idx < output_names_.size(); ++idx) {
|
||||
if (output_names_[idx] == tensor_name) {
|
||||
if (idx < outputs_.size()) {
|
||||
|
@ -266,7 +283,7 @@ tensor::TensorPtr SingleOpInferSession::GetOutputByTensorName(const std::string
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
tensor::TensorPtr SingleOpInferSession::GetInputByTensorName(const std::string &tensor_name) {
|
||||
MutableTensorImplPtr SingleOpInferSession::GetInputByTensorName(const std::string &tensor_name) {
|
||||
for (size_t idx = 0; idx < input_names_.size(); ++idx) {
|
||||
if (input_names_[idx] == tensor_name) {
|
||||
if (idx < inputs_.size()) {
|
||||
|
|
|
@ -32,24 +32,24 @@ class SingleOpInferSession : public InferSession {
|
|||
Status AscendInit(const std::shared_ptr<Context> &context);
|
||||
Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override;
|
||||
Status RunGraph() override;
|
||||
Status RunGraph(const std::vector<tensor::TensorPtr> &inputs, std::vector<tensor::TensorPtr> *outputs) override;
|
||||
Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) override;
|
||||
Status Resize(const std::vector<tensor::TensorPtr> &inputs, const std::vector<std::vector<int64_t>> &dims) override;
|
||||
|
||||
std::vector<tensor::TensorPtr> GetOutputs() override;
|
||||
std::vector<tensor::TensorPtr> GetInputs() override;
|
||||
std::vector<MutableTensorImplPtr> GetOutputs() override;
|
||||
std::vector<MutableTensorImplPtr> GetInputs() override;
|
||||
std::vector<std::string> GetOutputNames() override;
|
||||
std::vector<std::string> GetInputNames() override;
|
||||
tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) override;
|
||||
tensor::TensorPtr GetInputByTensorName(const std::string &name) override;
|
||||
MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override;
|
||||
MutableTensorImplPtr GetInputByTensorName(const std::string &name) override;
|
||||
|
||||
private:
|
||||
Status ResizeGraphInputs(const std::vector<tensor::TensorPtr> &inputs, const std::vector<std::vector<int64_t>> &dims);
|
||||
|
||||
KernelGraphUtilsPtr kernel_graph_utils_;
|
||||
KernelGraphPtr kernel_graph_;
|
||||
std::vector<tensor::TensorPtr> inputs_;
|
||||
std::vector<MutableTensorImplPtr> inputs_;
|
||||
std::vector<std::string> input_names_;
|
||||
std::vector<tensor::TensorPtr> outputs_;
|
||||
std::vector<MutableTensorImplPtr> outputs_;
|
||||
std::vector<std::string> output_names_;
|
||||
uint32_t device_id_ = 0;
|
||||
};
|
||||
|
|
|
@ -914,9 +914,10 @@ void KernelGraphUtils::GetModelInputsInfo(uint32_t graph_id, std::vector<tensor:
|
|||
auto kernel_build_info = AnfAlgo::GetSelectKernelBuildInfo(parameter);
|
||||
auto data_type = kernel_build_info->GetOutputDeviceType(0);
|
||||
auto ms_tensor = std::make_shared<tensor::Tensor>(data_type, input_shape);
|
||||
inputs->push_back(ms_tensor);
|
||||
auto abstract = parameter->abstract();
|
||||
MS_EXCEPTION_IF_NULL(abstract);
|
||||
ms_tensor->set_name(abstract->name());
|
||||
inputs->push_back(ms_tensor);
|
||||
inputs_name->push_back(abstract->name());
|
||||
}
|
||||
}
|
||||
|
@ -972,6 +973,12 @@ void KernelGraphUtils::GetModelOutputsInfo(uint32_t graph_id, std::vector<tensor
|
|||
}
|
||||
*outputs = TransformVectorRefToMultiTensor(vector_outputs);
|
||||
GetOutputNames(anf_outputs, output_names);
|
||||
if (outputs->size() != output_names->size()) {
|
||||
MS_LOG_EXCEPTION << "Output tensor size " << outputs->size() << " != output name size " << output_names->size();
|
||||
}
|
||||
for (size_t i = 0; i < outputs->size(); i++) {
|
||||
outputs->at(i)->set_name(output_names->at(i));
|
||||
}
|
||||
}
|
||||
|
||||
CNodePtr KernelGraphUtils::CreateNewCNode(const CNodePtr &cnode, KernelGraphPtr graph,
|
||||
|
|
|
@ -64,7 +64,7 @@ std::vector<AnfNodePtr> RuntimeUtils::GetGraphDataInputs(const KernelGraphPtr &k
|
|||
return data_inputs;
|
||||
}
|
||||
|
||||
void RuntimeUtils::CopyInputTensorsToKernelGraph(const std::vector<tensor::TensorPtr> &inputs,
|
||||
void RuntimeUtils::CopyInputTensorsToKernelGraph(const std::vector<tensor::Tensor> &inputs,
|
||||
KernelGraphPtr kernel_graph) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto graph_inputs = GetGraphDataInputs(kernel_graph);
|
||||
|
@ -74,20 +74,20 @@ void RuntimeUtils::CopyInputTensorsToKernelGraph(const std::vector<tensor::Tenso
|
|||
return;
|
||||
}
|
||||
for (size_t i = 0; i < graph_inputs.size(); i++) {
|
||||
auto input = inputs[i];
|
||||
auto &input = inputs[i];
|
||||
auto graph_input = graph_inputs[i];
|
||||
auto graph_input_addr = AnfAlgo::GetMutableOutputAddr(graph_input, 0);
|
||||
if (graph_input_addr->ptr_ == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Output_idx" << i << " of input " << graph_input->DebugString()
|
||||
<< " output addr ptr is nullptr.";
|
||||
}
|
||||
memcpy(graph_input_addr->ptr_, input->data_c(), graph_input_addr->size_);
|
||||
memcpy(graph_input_addr->ptr_, input.data_c(), graph_input_addr->size_);
|
||||
}
|
||||
}
|
||||
|
||||
void RuntimeUtils::CopyOutputTensorsFromKernelGraph(std::vector<tensor::TensorPtr> *outputs,
|
||||
KernelGraphPtr kernel_graph) {
|
||||
void RuntimeUtils::CopyOutputTensorsFromKernelGraph(std::vector<tensor::Tensor> *outputs, KernelGraphPtr kernel_graph) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
outputs->clear();
|
||||
auto graph_outputs = kernel_graph->outputs();
|
||||
for (auto graph_output : graph_outputs) {
|
||||
auto real_output_with_index = common::AnfAlgo::VisitKernelWithReturnType(graph_output, 0);
|
||||
|
@ -104,8 +104,7 @@ void RuntimeUtils::CopyOutputTensorsFromKernelGraph(std::vector<tensor::TensorPt
|
|||
auto s = static_cast<int64_t>(us);
|
||||
shape.push_back(s);
|
||||
}
|
||||
auto tensor_ptr = std::make_shared<mindspore::tensor::Tensor>(type_id, shape, data, data_size);
|
||||
outputs->push_back(tensor_ptr);
|
||||
outputs->emplace_back(mindspore::tensor::Tensor(type_id, shape, data, data_size));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -37,8 +37,8 @@ class RuntimeUtils {
|
|||
static kernel::AddressPtr GetAddressFromDevice(device::DeviceAddressPtr address_ptr);
|
||||
|
||||
static std::vector<AnfNodePtr> GetGraphDataInputs(const KernelGraphPtr &kernel_graph);
|
||||
static void CopyInputTensorsToKernelGraph(const std::vector<tensor::TensorPtr> &inputs, KernelGraphPtr kernel_graph);
|
||||
static void CopyOutputTensorsFromKernelGraph(std::vector<tensor::TensorPtr> *outputs, KernelGraphPtr kernel_graph);
|
||||
static void CopyInputTensorsToKernelGraph(const std::vector<tensor::Tensor> &inputs, KernelGraphPtr kernel_graph);
|
||||
static void CopyOutputTensorsFromKernelGraph(std::vector<tensor::Tensor> *outputs, KernelGraphPtr kernel_graph);
|
||||
|
||||
static void AssignKernelGraphAddress(KernelGraphPtr kernel_graph);
|
||||
static void AssignValueNodeAddress(KernelGraphPtr kernel_graph);
|
||||
|
|
|
@ -0,0 +1,141 @@
|
|||
/**
|
||||
* This is the C++ adaptation and derivative work of Myia (https://github.com/mila-iqia/myia/).
|
||||
*
|
||||
* Copyright 2019-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_UTILS_TENSOR_DEFAULT_IMPL_H_
|
||||
#define MINDSPORE_LITE_SRC_EXTENDRT_UTILS_TENSOR_DEFAULT_IMPL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
|
||||
#include "include/api/types.h"
|
||||
#include "ir/tensor.h"
|
||||
#include "runtime/device/device_address.h"
|
||||
#include "common/utils.h"
|
||||
#include "common/mutable_tensor_impl.h"
|
||||
|
||||
namespace mindspore {
|
||||
class TensorDefaultImpl : public MutableTensorImpl {
|
||||
public:
|
||||
TensorDefaultImpl() = default;
|
||||
TensorDefaultImpl(const std::string &name, enum DataType type, const std::vector<int64_t> &shape)
|
||||
: name_(name), type_(type), shape_(shape) {
|
||||
buffer_.SetData(nullptr, 0);
|
||||
data_ = buffer_.Data();
|
||||
}
|
||||
|
||||
TensorDefaultImpl(const std::string &name, enum DataType type, const std::vector<int64_t> &shape, const void *data,
|
||||
size_t data_len, bool ref_data, bool own_data)
|
||||
: name_(name), type_(type), shape_(shape) {
|
||||
if (ref_data) {
|
||||
data_ = data;
|
||||
own_data_ = own_data;
|
||||
} else {
|
||||
if (data == nullptr) {
|
||||
data_len = 0;
|
||||
}
|
||||
buffer_.SetData(data, data_len);
|
||||
data_ = buffer_.Data();
|
||||
}
|
||||
}
|
||||
~TensorDefaultImpl() {
|
||||
if (own_data_ && data_ != nullptr && data_ != buffer_.Data()) {
|
||||
free(const_cast<void *>(data_));
|
||||
}
|
||||
}
|
||||
void SetShape(const std::vector<int64_t> &shape) override { shape_ = shape; }
|
||||
void SetDataType(mindspore::DataType data_type) override { type_ = data_type; }
|
||||
void SetName(const std::string &name) override { name_ = name; }
|
||||
|
||||
mindspore::Format Format() const override { return format_; }
|
||||
void SetFormat(mindspore::Format format) override { format_ = format; }
|
||||
|
||||
const std::string &Name() const override { return name_; }
|
||||
enum DataType DataType() const override { return type_; }
|
||||
const std::vector<int64_t> &Shape() const override { return shape_; }
|
||||
|
||||
void SetAllocator(const std::shared_ptr<Allocator> &allocator) override { allocator_ = allocator; }
|
||||
std::shared_ptr<Allocator> GetAllocator() const override { return allocator_; }
|
||||
|
||||
std::vector<QuantParam> GetQuantParams() const override { return quant_param_; }
|
||||
void SetQuantParams(const std::vector<QuantParam> &quant_param) override { quant_param_ = quant_param; }
|
||||
|
||||
int64_t ElementNum() const { return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<int64_t>()); }
|
||||
size_t DataSize() const override { return ElementNum() * lite::DataTypeSize(static_cast<enum TypeId>(type_)); }
|
||||
|
||||
void SetDeviceData(void *data) override { device_data_ = data; }
|
||||
void *GetDeviceData() override { return device_data_; }
|
||||
bool IsConst() const override { return false; }
|
||||
|
||||
bool IsDevice() const override { return device_data_ != nullptr; }
|
||||
|
||||
std::shared_ptr<const void> Data() const override {
|
||||
ResizeData();
|
||||
return std::shared_ptr<const void>(data_, [](const void *) {});
|
||||
}
|
||||
|
||||
void SetData(void *data, bool own_data) override {
|
||||
data_ = data;
|
||||
own_data_ = own_data;
|
||||
}
|
||||
|
||||
void *MutableData() override {
|
||||
ResizeData();
|
||||
return const_cast<void *>(data_);
|
||||
}
|
||||
|
||||
std::shared_ptr<Impl> Clone() const override {
|
||||
auto impl = std::make_shared<TensorDefaultImpl>(name_, type_, shape_, data_, DataSize(), false, false);
|
||||
if (!impl) {
|
||||
return nullptr;
|
||||
}
|
||||
impl->SetFormat(format_);
|
||||
impl->SetQuantParams(quant_param_);
|
||||
impl->SetDeviceData(device_data_);
|
||||
impl->SetAllocator(allocator_);
|
||||
return impl;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string name_;
|
||||
enum DataType type_ = DataType::kTypeUnknown;
|
||||
enum Format format_ = mindspore::NCHW;
|
||||
std::vector<int64_t> shape_;
|
||||
std::shared_ptr<Allocator> allocator_ = nullptr;
|
||||
std::vector<QuantParam> quant_param_;
|
||||
void *device_data_ = nullptr;
|
||||
|
||||
mutable Buffer buffer_;
|
||||
mutable const void *data_ = nullptr;
|
||||
bool own_data_ = false;
|
||||
|
||||
void ResizeData() const {
|
||||
if (data_ != nullptr && data_ != buffer_.Data()) {
|
||||
return;
|
||||
}
|
||||
auto data_size = DataSize();
|
||||
if (data_size != buffer_.DataSize()) {
|
||||
buffer_.ResizeData(data_size);
|
||||
}
|
||||
data_ = buffer_.Data();
|
||||
}
|
||||
};
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_UTILS_TENSOR_DEFAULT_IMPL_H_
|
|
@ -18,10 +18,57 @@
|
|||
|
||||
#include <memory>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
#include "extendrt/utils/tensor_utils.h"
|
||||
#include "mindspore/ccsrc/kernel/common_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
TensorRefData::TensorRefData(void *data, size_t bytes_size, size_t data_size, size_t ndim)
|
||||
: data_(data), elem_count_(bytes_size), data_size_(data_size), ndim_(ndim) {}
|
||||
|
||||
ssize_t TensorRefData::size() const { return static_cast<ssize_t>(elem_count_); }
|
||||
|
||||
ssize_t TensorRefData::itemsize() const {
|
||||
if (elem_count_ == 0) {
|
||||
return 0;
|
||||
}
|
||||
return static_cast<ssize_t>(data_size_ / elem_count_);
|
||||
}
|
||||
|
||||
ssize_t TensorRefData::nbytes() const { return static_cast<ssize_t>(data_size_); }
|
||||
|
||||
ssize_t TensorRefData::ndim() const { return static_cast<ssize_t>(ndim_); }
|
||||
|
||||
void *TensorRefData::data() { return data_; }
|
||||
|
||||
const void *TensorRefData::const_data() const { return data_; }
|
||||
|
||||
std::string TensorRefData::ToString(TypeId type, const ShapeVector &shape, bool use_comma) const {
|
||||
std::stringstream stream;
|
||||
stream << "RefTensor:[";
|
||||
for (size_t i = 0; i < shape.size(); i++) {
|
||||
stream << shape[i];
|
||||
if (i + 1 < shape.size()) {
|
||||
stream << ",";
|
||||
}
|
||||
}
|
||||
stream << "]" << type;
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
mindspore::Format TensorTensorImpl::Format() const {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
return kernel::GetFormatFromStrToEnum(tensor_->device_info().format_);
|
||||
}
|
||||
|
||||
void TensorTensorImpl::SetFormat(mindspore::Format format) {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
auto device_info = tensor_->device_info();
|
||||
device_info.format_ = kernel::GetFormatFromEnumToStr(format);
|
||||
tensor_->set_device_info(device_info);
|
||||
}
|
||||
|
||||
std::vector<mindspore::tensor::TensorPtr> TensorUtils::MSTensorToTensorPtr(const std::vector<MSTensor> &ms_tensors) {
|
||||
std::vector<mindspore::tensor::TensorPtr> tensor_ptrs;
|
||||
|
||||
|
@ -31,7 +78,8 @@ std::vector<mindspore::tensor::TensorPtr> TensorUtils::MSTensorToTensorPtr(const
|
|||
auto shape = ms_tensor.Shape();
|
||||
auto data = ms_tensor.MutableData();
|
||||
auto data_size = ms_tensor.DataSize();
|
||||
auto tensor_ptr = std::make_shared<mindspore::tensor::Tensor>(type_id, shape, data, data_size);
|
||||
auto ref_tensor_data = std::make_shared<TensorRefData>(data, ms_tensor.ElementNum(), data_size, shape.size());
|
||||
auto tensor_ptr = std::make_shared<mindspore::tensor::Tensor>(type_id, shape, ref_tensor_data);
|
||||
tensor_ptrs.push_back(tensor_ptr);
|
||||
}
|
||||
return tensor_ptrs;
|
||||
|
@ -40,22 +88,46 @@ std::vector<mindspore::tensor::TensorPtr> TensorUtils::MSTensorToTensorPtr(const
|
|||
std::vector<MSTensor> TensorUtils::TensorPtrToMSTensor(std::vector<mindspore::tensor::TensorPtr> tensor_ptrs,
|
||||
const std::vector<std::string> &tensor_names) {
|
||||
std::vector<MSTensor> ms_tensors;
|
||||
|
||||
for (size_t i = 0; i < tensor_ptrs.size(); i++) {
|
||||
auto graph_tensor = tensor_ptrs[i];
|
||||
std::string graph_tensor_name = tensor_names[i];
|
||||
auto type_id = graph_tensor->data_type_c();
|
||||
auto data_type = static_cast<mindspore::DataType>(type_id);
|
||||
auto ms_tensor_ptr = MSTensor::CreateRefTensor(graph_tensor_name, data_type, graph_tensor->shape_c(),
|
||||
graph_tensor->data_c(), graph_tensor->Size());
|
||||
if (ms_tensor_ptr == nullptr) {
|
||||
MS_LOG_WARNING << "Failed to create input tensor ";
|
||||
return {};
|
||||
}
|
||||
ms_tensors.push_back(*ms_tensor_ptr);
|
||||
delete ms_tensor_ptr;
|
||||
graph_tensor->set_name(graph_tensor_name);
|
||||
auto tensor_impl = std::make_shared<TensorTensorImpl>(graph_tensor);
|
||||
ms_tensors.push_back(MSTensor(tensor_impl));
|
||||
}
|
||||
return ms_tensors;
|
||||
}
|
||||
|
||||
std::vector<mindspore::tensor::Tensor> TensorUtils::MSTensorToTensor(const std::vector<MSTensor> &ms_tensors) {
|
||||
std::vector<mindspore::tensor::Tensor> tensors;
|
||||
for (auto ms_tensor : ms_tensors) {
|
||||
auto data_type = ms_tensor.DataType();
|
||||
auto type_id = static_cast<mindspore::TypeId>(data_type);
|
||||
auto shape = ms_tensor.Shape();
|
||||
auto data = ms_tensor.MutableData();
|
||||
auto data_size = ms_tensor.DataSize();
|
||||
auto ref_tensor_data = std::make_shared<TensorRefData>(data, ms_tensor.ElementNum(), data_size, shape.size());
|
||||
mindspore::tensor::Tensor tensor(type_id, shape, ref_tensor_data);
|
||||
auto device_address = ms_tensor.GetDeviceData();
|
||||
if (device_address != nullptr) {
|
||||
auto lite_device_address = std::make_shared<LiteDeviceAddress>(device_address, ms_tensor.DataSize());
|
||||
tensor.set_device_address(lite_device_address);
|
||||
}
|
||||
tensors.emplace_back(std::move(tensor));
|
||||
}
|
||||
return tensors;
|
||||
}
|
||||
|
||||
std::vector<MSTensor> TensorUtils::TensorToMSTensor(std::vector<mindspore::tensor::Tensor> tensors,
|
||||
const std::vector<std::string> &tensor_names) {
|
||||
std::vector<MSTensor> ms_tensors;
|
||||
for (size_t i = 0; i < tensors.size(); i++) {
|
||||
auto &graph_tensor = tensors[i];
|
||||
std::string graph_tensor_name = tensor_names[i];
|
||||
graph_tensor.set_name(graph_tensor_name);
|
||||
auto tensor_impl = std::make_shared<TensorTensorImpl>(graph_tensor);
|
||||
ms_tensors.emplace_back(MSTensor(tensor_impl));
|
||||
}
|
||||
return ms_tensors;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,11 +21,160 @@
|
|||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
|
||||
#include "include/api/types.h"
|
||||
#include "ir/tensor.h"
|
||||
#include "runtime/device/device_address.h"
|
||||
#include "common/utils.h"
|
||||
#include "common/mutable_tensor_impl.h"
|
||||
#include "mindspore/core/ir/tensor.h"
|
||||
|
||||
namespace mindspore {
|
||||
class TensorRefData : public tensor::TensorData {
|
||||
public:
|
||||
TensorRefData(void *data, size_t elem_count, size_t data_size, size_t ndim);
|
||||
~TensorRefData() = default;
|
||||
|
||||
ssize_t size() const override;
|
||||
ssize_t itemsize() const override;
|
||||
ssize_t nbytes() const override;
|
||||
ssize_t ndim() const override;
|
||||
void *data() override;
|
||||
const void *const_data() const override;
|
||||
bool is_sub_data() const override { return false; }
|
||||
bool has_sub_data() const override { return false; }
|
||||
std::string ToString(TypeId type, const ShapeVector &shape, bool use_comma) const override;
|
||||
|
||||
private:
|
||||
void *data_ = nullptr;
|
||||
size_t elem_count_ = 0;
|
||||
size_t data_size_ = 0;
|
||||
size_t ndim_ = 0;
|
||||
};
|
||||
|
||||
constexpr auto kLiteDeviceName = "LiteDevice";
|
||||
|
||||
class LiteDeviceAddress : public device::DeviceAddress {
|
||||
public:
|
||||
LiteDeviceAddress(void *ptr, size_t size) : device::DeviceAddress(ptr, size) { device_name_ = kLiteDeviceName; }
|
||||
void SetData(void *data) { set_ptr(data); }
|
||||
|
||||
bool SyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const override {
|
||||
return false;
|
||||
}
|
||||
bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr,
|
||||
const std::string &format) const override {
|
||||
return false;
|
||||
}
|
||||
bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr) const override {
|
||||
return SyncHostToDevice(shape, size, type, host_ptr, "DefaultFormat");
|
||||
}
|
||||
void ClearDeviceMemory() override {}
|
||||
};
|
||||
|
||||
class TensorTensorImpl : public MutableTensorImpl {
|
||||
public:
|
||||
explicit TensorTensorImpl(const tensor::Tensor &tensor) : tensor_(std::make_shared<tensor::Tensor>(tensor)) {}
|
||||
explicit TensorTensorImpl(const std::shared_ptr<tensor::Tensor> &tensor) : tensor_(tensor) {}
|
||||
|
||||
void SetData(void *, bool) override { MS_LOG_EXCEPTION << "Cannot set data for TensorTensorImpl"; }
|
||||
|
||||
std::shared_ptr<const void> Data() const override {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
return std::shared_ptr<const void>(tensor_->data_c(), [](const void *) {});
|
||||
}
|
||||
|
||||
void *MutableData() override {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
return tensor_->data_c();
|
||||
}
|
||||
|
||||
void SetDeviceData(void *data) override {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
auto data_size = DataSize();
|
||||
auto device_address = std::make_shared<LiteDeviceAddress>(data, data_size);
|
||||
tensor_->set_device_address(device_address);
|
||||
}
|
||||
void *GetDeviceData() override {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
auto device_address = tensor_->device_address();
|
||||
if (device_address == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
return device_address->GetMutablePtr();
|
||||
}
|
||||
|
||||
bool IsDevice() const override {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
return tensor_->device_address() != nullptr;
|
||||
}
|
||||
|
||||
bool IsConst() const override { return false; }
|
||||
|
||||
void SetShape(const std::vector<int64_t> &shape) override {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
tensor_->set_shape(shape);
|
||||
}
|
||||
void SetDataType(mindspore::DataType data_type) override {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
tensor_->set_data_type(static_cast<enum TypeId>(data_type));
|
||||
}
|
||||
void SetName(const std::string &name) override {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
tensor_->set_name(name);
|
||||
}
|
||||
|
||||
mindspore::Format Format() const override;
|
||||
|
||||
void SetFormat(mindspore::Format format) override;
|
||||
|
||||
const std::string &Name() const override {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
return tensor_->name();
|
||||
}
|
||||
enum DataType DataType() const override {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
return static_cast<enum DataType>(tensor_->data_type());
|
||||
}
|
||||
const std::vector<int64_t> &Shape() const override {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
return tensor_->shape();
|
||||
}
|
||||
|
||||
void SetAllocator(const std::shared_ptr<Allocator> &allocator) override {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
tensor_->set_user_data("allocator", allocator);
|
||||
}
|
||||
std::shared_ptr<Allocator> GetAllocator() const override {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
return tensor_->user_data<Allocator>("allocator");
|
||||
}
|
||||
|
||||
std::vector<QuantParam> GetQuantParams() const override {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
auto data = tensor_->user_data<std::vector<QuantParam>>("quant_param");
|
||||
return data ? *data : std::vector<QuantParam>();
|
||||
}
|
||||
|
||||
void SetQuantParams(const std::vector<QuantParam> &quant_param) override {
|
||||
MS_EXCEPTION_IF_NULL(tensor_);
|
||||
tensor_->set_user_data("quant_param", std::make_shared<std::vector<QuantParam>>(quant_param));
|
||||
}
|
||||
|
||||
int64_t ElementNum() const {
|
||||
auto &shape = Shape();
|
||||
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int64_t>());
|
||||
}
|
||||
size_t DataSize() const override { return ElementNum() * lite::DataTypeSize(static_cast<enum TypeId>(DataType())); }
|
||||
|
||||
std::shared_ptr<Impl> Clone() const override { return std::make_shared<TensorTensorImpl>(tensor_); }
|
||||
|
||||
private:
|
||||
std::shared_ptr<tensor::Tensor> tensor_ = nullptr;
|
||||
};
|
||||
|
||||
class TensorUtils {
|
||||
public:
|
||||
// MSTensor <-> TensorPtr
|
||||
|
@ -33,6 +182,10 @@ class TensorUtils {
|
|||
static std::vector<MSTensor> TensorPtrToMSTensor(std::vector<mindspore::tensor::TensorPtr> tensor_ptrs,
|
||||
const std::vector<std::string> &tensor_names);
|
||||
|
||||
static std::vector<mindspore::tensor::Tensor> MSTensorToTensor(const std::vector<MSTensor> &ms_tensors);
|
||||
static std::vector<MSTensor> TensorToMSTensor(std::vector<mindspore::tensor::Tensor> tensors,
|
||||
const std::vector<std::string> &tensor_names);
|
||||
|
||||
// TensorPtr <-> Tensor
|
||||
static std::vector<mindspore::tensor::TensorPtr> TensorToTensorPtr(
|
||||
const std::vector<mindspore::tensor::Tensor> &tensors);
|
||||
|
|
|
@ -146,7 +146,7 @@ MSFormat MSTensorGetFormat(const MSTensorHandle tensor) {
|
|||
return kMSFormatNHWC;
|
||||
}
|
||||
auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
|
||||
return static_cast<MSFormat>(impl->format());
|
||||
return static_cast<MSFormat>(impl->Format());
|
||||
}
|
||||
|
||||
void MSTensorSetData(MSTensorHandle tensor, void *data) {
|
||||
|
@ -155,7 +155,7 @@ void MSTensorSetData(MSTensorHandle tensor, void *data) {
|
|||
return;
|
||||
}
|
||||
auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
|
||||
return impl->SetData(data);
|
||||
return impl->SetData(data, true);
|
||||
}
|
||||
|
||||
const void *MSTensorGetData(const MSTensorHandle tensor) {
|
||||
|
|
|
@ -77,6 +77,12 @@ std::shared_ptr<LiteTensorImpl> LiteTensorImpl::CreateTensorImplByDeepCopy(const
|
|||
return impl;
|
||||
}
|
||||
|
||||
void LiteTensorImpl::SetDeviceData(void *data) { MS_LOG(ERROR) << "Not implement."; }
|
||||
void *LiteTensorImpl::GetDeviceData() {
|
||||
MS_LOG(ERROR) << "Not implement.";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
#ifndef STRING_KERNEL_CLIP
|
||||
std::shared_ptr<LiteTensorImpl> LiteTensorImpl::StringsToTensorImpl(const std::string &name,
|
||||
const std::vector<std::string> &str) {
|
||||
|
|
|
@ -30,11 +30,12 @@
|
|||
#include "src/tensor.h"
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "ir/api_tensor_impl.h"
|
||||
#include "common/mutable_tensor_impl.h"
|
||||
|
||||
namespace mindspore {
|
||||
using mindspore::lite::RET_OK;
|
||||
|
||||
class LiteTensorImpl : public mindspore::MSTensor::Impl {
|
||||
class LiteTensorImpl : public MutableTensorImpl {
|
||||
public:
|
||||
LiteTensorImpl() {}
|
||||
|
||||
|
@ -80,7 +81,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
|
|||
return tensor_name_;
|
||||
}
|
||||
|
||||
void SetName(const std::string &name) {
|
||||
void SetName(const std::string &name) override {
|
||||
if (lite_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid tensor.";
|
||||
return;
|
||||
|
@ -97,7 +98,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
|
|||
return static_cast<enum DataType>(lite_tensor_->data_type());
|
||||
}
|
||||
|
||||
void SetDataType(enum DataType data_type) {
|
||||
void SetDataType(enum DataType data_type) override {
|
||||
if (lite_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid tensor.";
|
||||
return;
|
||||
|
@ -127,7 +128,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
|
|||
|
||||
std::shared_ptr<mindspore::MSTensor::Impl> Clone() const override { return nullptr; }
|
||||
|
||||
void SetShape(const std::vector<int64_t> &shape) {
|
||||
void SetShape(const std::vector<int64_t> &shape) override {
|
||||
if (lite_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid tensor.";
|
||||
return;
|
||||
|
@ -138,7 +139,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
|
|||
lite_tensor_->set_shape(tensor_shape);
|
||||
}
|
||||
|
||||
std::shared_ptr<Allocator> allocator() const {
|
||||
std::shared_ptr<Allocator> GetAllocator() const override {
|
||||
if (lite_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid tensor.";
|
||||
return nullptr;
|
||||
|
@ -146,7 +147,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
|
|||
return lite_tensor_->allocator();
|
||||
}
|
||||
|
||||
void SetAllocator(std::shared_ptr<Allocator> allocator) {
|
||||
void SetAllocator(const std::shared_ptr<Allocator> &allocator) override {
|
||||
if (lite_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid tensor.";
|
||||
return;
|
||||
|
@ -154,7 +155,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
|
|||
lite_tensor_->set_allocator(allocator);
|
||||
}
|
||||
|
||||
mindspore::Format format() {
|
||||
mindspore::Format Format() const override {
|
||||
if (lite_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid tensor.";
|
||||
return mindspore::Format::NHWC;
|
||||
|
@ -162,7 +163,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
|
|||
return lite_tensor_->format();
|
||||
}
|
||||
|
||||
void SetFormat(mindspore::Format format) {
|
||||
void SetFormat(mindspore::Format format) override {
|
||||
if (lite_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid tensor.";
|
||||
return;
|
||||
|
@ -185,7 +186,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
|
|||
}
|
||||
return lite_tensor_->MutableData();
|
||||
}
|
||||
virtual bool IsConst() const {
|
||||
bool IsConst() const override {
|
||||
if (lite_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid tensor.";
|
||||
return false;
|
||||
|
@ -201,15 +202,15 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
|
|||
return lite_tensor_->Size();
|
||||
}
|
||||
|
||||
void SetData(void *data) {
|
||||
void SetData(void *data, bool own_data) override {
|
||||
if (lite_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid tensor.";
|
||||
return;
|
||||
}
|
||||
lite_tensor_->set_data(data);
|
||||
lite_tensor_->set_data(data, own_data);
|
||||
}
|
||||
|
||||
virtual std::vector<QuantParam> QuantParams() const {
|
||||
std::vector<QuantParam> GetQuantParams() const override {
|
||||
if (lite_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid tensor.";
|
||||
return std::vector<QuantParam>{};
|
||||
|
@ -228,7 +229,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
|
|||
return quant_params;
|
||||
}
|
||||
|
||||
void SetQuantParams(std::vector<QuantParam> quant_params) {
|
||||
void SetQuantParams(const std::vector<QuantParam> &quant_params) override {
|
||||
if (lite_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid tensor.";
|
||||
return;
|
||||
|
@ -261,6 +262,9 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
|
|||
|
||||
void set_from_session(bool from_session) { from_session_ = from_session; }
|
||||
|
||||
void SetDeviceData(void *data) override;
|
||||
void *GetDeviceData() override;
|
||||
|
||||
private:
|
||||
lite::Tensor *lite_tensor_ = nullptr;
|
||||
std::string tensor_name_ = "";
|
||||
|
|
|
@ -100,6 +100,8 @@ bool MSTensor::operator==(const MSTensor &tensor) const {
|
|||
return lite_impl->lite_tensor() == lite_tensor_impl->lite_tensor();
|
||||
}
|
||||
|
||||
bool MSTensor::operator!=(const MSTensor &tensor) const { return !operator==(tensor); }
|
||||
|
||||
MSTensor *MSTensor::CreateTensor(const std::vector<char> &name, enum DataType type, const std::vector<int64_t> &shape,
|
||||
const void *data, size_t data_len) noexcept {
|
||||
if (data_len > MAX_MALLOC_SIZE) {
|
||||
|
@ -146,12 +148,14 @@ MSTensor *MSTensor::CreateTensor(const std::vector<char> &name, enum DataType ty
|
|||
}
|
||||
|
||||
MSTensor *MSTensor::CreateRefTensor(const std::vector<char> &name, enum DataType type,
|
||||
const std::vector<int64_t> &shape, const void *data, size_t data_len) noexcept {
|
||||
const std::vector<int64_t> &shape, const void *data, size_t data_len,
|
||||
bool own_data) noexcept {
|
||||
auto impl = LiteTensorImpl::CreateTensorImpl(CharToString(name), type, shape, data, data_len);
|
||||
if (impl == nullptr) {
|
||||
MS_LOG(ERROR) << "Allocate tensor impl failed.";
|
||||
return nullptr;
|
||||
}
|
||||
impl->set_own_data(own_data);
|
||||
auto ms_tensor = new (std::nothrow) MSTensor(impl);
|
||||
if (ms_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "Allocate tensor impl failed.";
|
||||
|
@ -160,10 +164,10 @@ MSTensor *MSTensor::CreateRefTensor(const std::vector<char> &name, enum DataType
|
|||
return ms_tensor;
|
||||
}
|
||||
|
||||
MSTensor *MSTensor::CreateDevTensor(const std::vector<char> &name, enum DataType type,
|
||||
const std::vector<int64_t> &shape, const void *data, size_t data_len) noexcept {
|
||||
MSTensor MSTensor::CreateDeviceTensor(const std::vector<char> &name, enum DataType type,
|
||||
const std::vector<int64_t> &shape, void *data, size_t data_len) noexcept {
|
||||
MS_LOG(ERROR) << "Unsupported Feature.";
|
||||
return nullptr;
|
||||
return MSTensor(nullptr);
|
||||
}
|
||||
|
||||
MSTensor *MSTensor::CreateTensorFromFile(const std::vector<char> &file, enum DataType type,
|
||||
|
@ -305,12 +309,28 @@ void *MSTensor::MutableData() {
|
|||
return impl_->MutableData();
|
||||
}
|
||||
|
||||
void MSTensor::SetDeviceData(void *data) {
|
||||
if (impl_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid tensor implement.";
|
||||
return;
|
||||
}
|
||||
std::static_pointer_cast<MutableTensorImpl>(impl_)->SetDeviceData(data);
|
||||
}
|
||||
|
||||
void *MSTensor::GetDeviceData() {
|
||||
if (impl_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid tensor implement.";
|
||||
return nullptr;
|
||||
}
|
||||
return std::static_pointer_cast<MutableTensorImpl>(impl_)->GetDeviceData();
|
||||
}
|
||||
|
||||
bool MSTensor::IsConst() const {
|
||||
if (impl_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid tensor implement.";
|
||||
return false;
|
||||
}
|
||||
return std::static_pointer_cast<LiteTensorImpl>(impl_)->IsConst();
|
||||
return std::static_pointer_cast<MutableTensorImpl>(impl_)->IsConst();
|
||||
}
|
||||
|
||||
size_t MSTensor::DataSize() const {
|
||||
|
@ -338,7 +358,7 @@ void MSTensor::SetShape(const std::vector<int64_t> &shape) {
|
|||
return;
|
||||
}
|
||||
|
||||
std::static_pointer_cast<LiteTensorImpl>(impl_)->SetShape(shape);
|
||||
std::static_pointer_cast<MutableTensorImpl>(impl_)->SetShape(shape);
|
||||
}
|
||||
|
||||
void MSTensor::SetDataType(enum DataType data_type) {
|
||||
|
@ -347,7 +367,7 @@ void MSTensor::SetDataType(enum DataType data_type) {
|
|||
return;
|
||||
}
|
||||
|
||||
std::static_pointer_cast<LiteTensorImpl>(impl_)->SetDataType(data_type);
|
||||
std::static_pointer_cast<MutableTensorImpl>(impl_)->SetDataType(data_type);
|
||||
}
|
||||
|
||||
void MSTensor::SetTensorName(const std::vector<char> &name) {
|
||||
|
@ -355,7 +375,7 @@ void MSTensor::SetTensorName(const std::vector<char> &name) {
|
|||
MS_LOG(ERROR) << "Invalid tensor implement.";
|
||||
return;
|
||||
}
|
||||
std::static_pointer_cast<LiteTensorImpl>(impl_)->SetName(CharToString(name));
|
||||
std::static_pointer_cast<MutableTensorImpl>(impl_)->SetName(CharToString(name));
|
||||
}
|
||||
|
||||
void MSTensor::SetAllocator(std::shared_ptr<Allocator> allocator) {
|
||||
|
@ -364,7 +384,7 @@ void MSTensor::SetAllocator(std::shared_ptr<Allocator> allocator) {
|
|||
return;
|
||||
}
|
||||
|
||||
return std::static_pointer_cast<LiteTensorImpl>(impl_)->SetAllocator(allocator);
|
||||
return std::static_pointer_cast<MutableTensorImpl>(impl_)->SetAllocator(allocator);
|
||||
}
|
||||
|
||||
std::shared_ptr<Allocator> MSTensor::allocator() const {
|
||||
|
@ -373,7 +393,7 @@ std::shared_ptr<Allocator> MSTensor::allocator() const {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
return std::static_pointer_cast<LiteTensorImpl>(impl_)->allocator();
|
||||
return std::static_pointer_cast<MutableTensorImpl>(impl_)->GetAllocator();
|
||||
}
|
||||
|
||||
void MSTensor::SetFormat(mindspore::Format format) {
|
||||
|
@ -382,7 +402,7 @@ void MSTensor::SetFormat(mindspore::Format format) {
|
|||
return;
|
||||
}
|
||||
|
||||
return std::static_pointer_cast<LiteTensorImpl>(impl_)->SetFormat(format);
|
||||
return std::static_pointer_cast<MutableTensorImpl>(impl_)->SetFormat(format);
|
||||
}
|
||||
|
||||
mindspore::Format MSTensor::format() const {
|
||||
|
@ -391,16 +411,16 @@ mindspore::Format MSTensor::format() const {
|
|||
return mindspore::Format::NHWC;
|
||||
}
|
||||
|
||||
return std::static_pointer_cast<LiteTensorImpl>(impl_)->format();
|
||||
return std::static_pointer_cast<MutableTensorImpl>(impl_)->Format();
|
||||
}
|
||||
|
||||
void MSTensor::SetData(void *data) {
|
||||
void MSTensor::SetData(void *data, bool own_data) {
|
||||
if (impl_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid tensor implement.";
|
||||
return;
|
||||
}
|
||||
|
||||
return std::static_pointer_cast<LiteTensorImpl>(impl_)->SetData(data);
|
||||
return std::static_pointer_cast<MutableTensorImpl>(impl_)->SetData(data, own_data);
|
||||
}
|
||||
|
||||
std::vector<QuantParam> MSTensor::QuantParams() const {
|
||||
|
@ -409,7 +429,7 @@ std::vector<QuantParam> MSTensor::QuantParams() const {
|
|||
return std::vector<QuantParam>{};
|
||||
}
|
||||
|
||||
return std::static_pointer_cast<LiteTensorImpl>(impl_)->QuantParams();
|
||||
return std::static_pointer_cast<MutableTensorImpl>(impl_)->GetQuantParams();
|
||||
}
|
||||
|
||||
void MSTensor::SetQuantParams(std::vector<QuantParam> quant_params) {
|
||||
|
@ -418,7 +438,7 @@ void MSTensor::SetQuantParams(std::vector<QuantParam> quant_params) {
|
|||
return;
|
||||
}
|
||||
|
||||
return std::static_pointer_cast<LiteTensorImpl>(impl_)->SetQuantParams(quant_params);
|
||||
return std::static_pointer_cast<MutableTensorImpl>(impl_)->SetQuantParams(quant_params);
|
||||
}
|
||||
|
||||
Buffer::Buffer() : impl_(std::make_shared<Impl>()) {}
|
||||
|
|
|
@ -128,16 +128,13 @@ class Tensor {
|
|||
|
||||
// note: in the case of that old_data is valid, set_data just releases the ownership of it but not frees it. Of
|
||||
// course, you can call FreeData before calling set_data to ensure the data can be freed by current tensor.
|
||||
void set_data(void *data) {
|
||||
if (this->data_ == data) {
|
||||
return;
|
||||
}
|
||||
if (allocator_ != nullptr) {
|
||||
void set_data(void *data, bool own_data = true) {
|
||||
if (allocator_ != nullptr && this->data_ != data) {
|
||||
allocator_->IncRefCount(data, 1);
|
||||
allocator_->DecRefCount(this->data_, 1);
|
||||
}
|
||||
this->data_ = data;
|
||||
this->own_data_ = true;
|
||||
this->own_data_ = own_data;
|
||||
}
|
||||
|
||||
Category category() const { return this->category_; }
|
||||
|
|
|
@ -910,12 +910,12 @@ int BenchmarkUnifiedApi::PrintInputData() {
|
|||
#ifdef PARALLEL_INFERENCE
|
||||
void BenchmarkUnifiedApi::ModelParallelRunnerWarmUp(int index) {
|
||||
auto in = model_runner_.GetInputs();
|
||||
auto output = all_outputs_[index];
|
||||
for (size_t i = 0; i < in.size(); i++) {
|
||||
in[i].SetData(all_inputs_data_[index][i]);
|
||||
in[i].SetShape(resize_dims_[i]);
|
||||
}
|
||||
auto warm_up_start = GetTimeUs();
|
||||
std::vector<MSTensor> output;
|
||||
auto ret = model_runner_.Predict(in, &output);
|
||||
for (size_t j = 0; j < in.size(); j++) {
|
||||
in[j].SetData(nullptr);
|
||||
|
@ -937,12 +937,12 @@ void BenchmarkUnifiedApi::ModelParallelRunnerRun(int task_num, int parallel_idx)
|
|||
int idx = parallel_idx + flags_->warm_up_loop_count_;
|
||||
auto in = model_runner_.GetInputs();
|
||||
auto in_data = all_inputs_data_[idx];
|
||||
auto output = all_outputs_[idx];
|
||||
for (size_t tensor_index = 0; tensor_index < in.size(); tensor_index++) {
|
||||
in.at(tensor_index).SetData(all_inputs_data_.at(idx)[tensor_index]);
|
||||
in.at(tensor_index).SetShape(resize_dims_.at(tensor_index));
|
||||
}
|
||||
auto predict_start = GetTimeUs();
|
||||
std::vector<MSTensor> output;
|
||||
auto ret = model_runner_.Predict(in, &output);
|
||||
if (ret != kSuccess) {
|
||||
model_parallel_runner_ret_failed_ = true;
|
||||
|
|
|
@ -13,6 +13,7 @@ set(REG_SRC ${CONVERT_REG_SRC}
|
|||
${KERNEL_REG_DIR}/../common/string_util.cc
|
||||
${KERNEL_REG_DIR}/../common/utils.cc
|
||||
${KERNEL_REG_DIR}/../extendrt/delegate/tensorrt/distribution/distribution_base.cc
|
||||
${KERNEL_REG_DIR}/../extendrt/delegate/plugin/tensorrt_executor_plugin.cc
|
||||
${CORE_DIR}/utils/log_adapter.cc
|
||||
${CORE_DIR}/utils/status.cc
|
||||
${CONVERTER_DIR}/converter_context.cc
|
||||
|
|
|
@ -137,9 +137,9 @@ TEST_F(TestZeroCopy, TestDeviceTensor) {
|
|||
// Apply transform on images
|
||||
Status rc = Transform(image, &image);
|
||||
ASSERT_TRUE(rc == kSuccess);
|
||||
MSTensor *device_tensor =
|
||||
MSTensor::CreateDevTensor(image.Name(), image.DataType(), image.Shape(),
|
||||
image.MutableData(), image.DataSize());
|
||||
MSTensor device_tensor =
|
||||
MSTensor::CreateDeviceTensor(image.Name(), image.DataType(), image.Shape(),
|
||||
image.MutableData(), image.DataSize());
|
||||
MSTensor *tensor =
|
||||
MSTensor::CreateTensor(image.Name(), image.DataType(), image.Shape(),
|
||||
image.Data().get(), image.DataSize());
|
||||
|
@ -158,7 +158,7 @@ TEST_F(TestZeroCopy, TestDeviceTensor) {
|
|||
inputs.clear();
|
||||
start_time = (TimeValue){0};
|
||||
end_time = (TimeValue){0};
|
||||
inputs.push_back(*device_tensor);
|
||||
inputs.push_back(device_tensor);
|
||||
|
||||
// infer with device tensor
|
||||
(void)gettimeofday(&start_time, nullptr);
|
||||
|
|
Loading…
Reference in New Issue