forked from mindspore-Ecosystem/mindspore
support custom subgraph
This commit is contained in:
parent
5d156c7c09
commit
e22d28c52d
|
@ -51,6 +51,7 @@ struct DeviceContext {
|
|||
DeviceInfo device_info_;
|
||||
std::string provider_{};
|
||||
std::string provider_device_{};
|
||||
AllocatorPtr allocator_ = nullptr;
|
||||
};
|
||||
|
||||
/// \brief Context defined for holding environment variables during runtime.
|
||||
|
|
|
@ -42,12 +42,12 @@ class MS_API MSTensor {
|
|||
/// \brief Set memory allocator for current MSTensor.
|
||||
///
|
||||
/// \param[in] allocator Define memory allocator, which is shown in allocator.h.
|
||||
virtual void set_allocator(mindspore::Allocator *allocator) = 0;
|
||||
virtual void set_allocator(AllocatorPtr allocator) = 0;
|
||||
|
||||
/// \brief Get memory allocator of current MSTensor.
|
||||
///
|
||||
/// \return Pointer of memory allocator class.
|
||||
virtual mindspore::Allocator *allocator() const = 0;
|
||||
virtual AllocatorPtr allocator() const = 0;
|
||||
|
||||
/// \brief Get data type of the MindSpore Lite MSTensor.
|
||||
///
|
||||
|
|
|
@ -28,9 +28,6 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
extern const char *const kArchCPU;
|
||||
extern const char *const kArchGPU;
|
||||
|
||||
/// \brief KernelDesc defined kernel's basic attribute.
|
||||
struct MS_API KernelDesc {
|
||||
TypeId data_type; /**< kernel data type argument */
|
||||
|
|
|
@ -60,8 +60,8 @@ class MTensor : public mindspore::tensor::MSTensor {
|
|||
MTensor(String name, TypeId type, Vector<int32_t> shape) : tensor_name_(name), data_type_(type), shape_(shape) {}
|
||||
~MTensor() override;
|
||||
|
||||
void set_allocator(mindspore::Allocator *allocator) override {}
|
||||
mindspore::Allocator *allocator() const override { return nullptr; }
|
||||
void set_allocator(AllocatorPtr allocator) override {}
|
||||
AllocatorPtr allocator() const override { return nullptr; }
|
||||
TypeId data_type() const override { return data_type_; }
|
||||
void set_data_type(TypeId data_type) override { data_type_ = data_type; }
|
||||
Vector<int> shape() const override { return shape_; }
|
||||
|
|
|
@ -39,15 +39,10 @@ class MS_API MSTensor {
|
|||
static MSTensor *CreateTensor(const String &name, TypeId type, const Vector<int> &shape, const void *data,
|
||||
size_t data_len);
|
||||
|
||||
/// \brief Set memory allocator for current MSTensor.
|
||||
///
|
||||
/// \param[in] allocator Define memory allocator, which is shown in allocator.h.
|
||||
virtual void set_allocator(mindspore::Allocator *allocator) = 0;
|
||||
|
||||
/// \brief Get memory allocator of current MSTensor.
|
||||
///
|
||||
/// \return Pointer of memory allocator class.
|
||||
virtual mindspore::Allocator *allocator() const = 0;
|
||||
virtual AllocatorPtr allocator() const = 0;
|
||||
|
||||
/// \brief Get data type of the MindSpore Lite MSTensor.
|
||||
///
|
||||
|
|
|
@ -41,8 +41,8 @@ class MTensor : public mindspore::tensor::MSTensor {
|
|||
MTensor(String name, TypeId type, Vector<int> shape) : tensor_name_(name), data_type_(type), shape_(shape) {}
|
||||
~MTensor() override;
|
||||
|
||||
void set_allocator(mindspore::Allocator *allocator) override {}
|
||||
mindspore::Allocator *allocator() const override { return nullptr; }
|
||||
void set_allocator(AllocatorPtr allocator) override {}
|
||||
AllocatorPtr allocator() const override { return nullptr; }
|
||||
TypeId data_type() const override { return data_type_; }
|
||||
void set_data_type(TypeId data_type) override { data_type_ = data_type; }
|
||||
Vector<int> shape() const override { return shape_; }
|
||||
|
|
|
@ -39,6 +39,8 @@ using mindspore::kernel::KernelKey;
|
|||
|
||||
namespace mindspore::lite {
|
||||
namespace {
|
||||
const char *const kArchCPU = "CPU";
|
||||
const char *const kArchGPU = "GPU";
|
||||
void KernelKeyToKernelDesc(const KernelKey &key, kernel::KernelDesc *desc) {
|
||||
MS_ASSERT(desc != nullptr);
|
||||
desc->data_type = key.data_type;
|
||||
|
@ -158,7 +160,15 @@ int KernelRegistry::GetKernel(const std::vector<Tensor *> &in_tensors, const std
|
|||
if (base_kernel != nullptr) {
|
||||
auto *lite_kernel = new (std::nothrow) kernel::LiteKernel(base_kernel);
|
||||
if (lite_kernel != nullptr) {
|
||||
lite_kernel->set_desc(key);
|
||||
kernel::KernelKey tmp_key = key;
|
||||
if (tmp_key.provider == kArchCPU) {
|
||||
tmp_key.arch = kernel::kCPU;
|
||||
} else if (tmp_key.provider == kArchGPU) {
|
||||
tmp_key.arch = kernel::kGPU;
|
||||
} else {
|
||||
tmp_key.arch = kernel::kCustom;
|
||||
}
|
||||
lite_kernel->set_desc(tmp_key);
|
||||
*kernel = lite_kernel;
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
#include "src/inner_kernel.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
enum KERNEL_ARCH { kCPU, kGPU, kAPU, kNPU, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU };
|
||||
enum KERNEL_ARCH { kCPU, kGPU, kAPU, kNPU, kCustom, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU };
|
||||
static const char *const kBuiltin = "Builtin";
|
||||
|
||||
struct KernelKey {
|
||||
|
@ -61,7 +61,15 @@ struct KernelKey {
|
|||
}
|
||||
};
|
||||
|
||||
enum SubGraphType { kNotSubGraph = 0, kCpuFP32SubGraph, kCpuFP16SubGraph, kGpuSubGraph, kNpuSubGraph, kApuSubGraph };
|
||||
enum SubGraphType {
|
||||
kNotSubGraph = 0,
|
||||
kCpuFP32SubGraph,
|
||||
kCpuFP16SubGraph,
|
||||
kGpuSubGraph,
|
||||
kNpuSubGraph,
|
||||
kApuSubGraph,
|
||||
kCustomSubGraph
|
||||
};
|
||||
|
||||
class LiteKernel {
|
||||
public:
|
||||
|
|
|
@ -20,9 +20,6 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
const char *const kArchCPU = "CPU";
|
||||
const char *const kArchGPU = "GPU";
|
||||
|
||||
int RegisterKernel::RegCustomKernel(const std::string &arch, const std::string &provider, TypeId data_type,
|
||||
const std::string &type, CreateKernel creator) {
|
||||
return lite::RegistryKernelImpl::GetInstance()->RegCustomKernel(arch, provider, data_type, type, creator);
|
||||
|
|
|
@ -128,7 +128,13 @@ kernel::CreateKernel RegistryKernelImpl::GetProviderCreator(const kernel::Kernel
|
|||
return nullptr;
|
||||
}
|
||||
for (auto &&item : kernel_creators_) {
|
||||
if (item.first != desc.provider) {
|
||||
continue;
|
||||
}
|
||||
for (auto &&arch_item : item.second) {
|
||||
if (arch_item.first != desc.arch) {
|
||||
continue;
|
||||
}
|
||||
creator = arch_item.second[index];
|
||||
if (creator != nullptr) {
|
||||
break;
|
||||
|
|
|
@ -129,7 +129,7 @@ int NPUInsertTransformPass::InsertNode(kernel::LiteKernel *kernel, kernel::LiteK
|
|||
MS_LOG(ERROR) << "New nchw tensor failed when inserting nchw2nhwc kernel.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nh2nc_tensor->set_allocator(context_->allocator.get());
|
||||
nh2nc_tensor->set_allocator(context_->allocator);
|
||||
nh2nc_tensor->set_tensor_name(nh2nc_name + "/output0");
|
||||
std::vector<Tensor *> nh2nc_tensors = {nh2nc_tensor};
|
||||
all_tensors_->push_back(nh2nc_tensors[0]);
|
||||
|
@ -140,7 +140,7 @@ int NPUInsertTransformPass::InsertNode(kernel::LiteKernel *kernel, kernel::LiteK
|
|||
MS_LOG(ERROR) << "New nhwc tensor failed when inserting nhwc2nchw kernel.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nc2nh_tensor->set_allocator(context_->allocator.get());
|
||||
nc2nh_tensor->set_allocator(context_->allocator);
|
||||
nc2nh_tensor->set_tensor_name(nc2nh_name + "/output0");
|
||||
std::vector<Tensor *> nc2nh_tensors = {nc2nh_tensor};
|
||||
all_tensors_->push_back(nc2nh_tensors[0]);
|
||||
|
|
|
@ -51,7 +51,7 @@ int NPUTransformPass::InsertPreNodes(kernel::LiteKernel *kernel, std::vector<ker
|
|||
MS_LOG(ERROR) << "New nchw tensor failed when inserting pre nhwc2nchw kernel.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
tensor->set_allocator(context_->allocator.get());
|
||||
tensor->set_allocator(context_->allocator);
|
||||
auto name = kernel->name() + "_pre_trans" + "_Nhwc2Nchw_" + std::to_string(total++);
|
||||
tensor->set_tensor_name(name + "/output0");
|
||||
std::vector<Tensor *> pre_trans_out_tensors = {tensor};
|
||||
|
@ -112,7 +112,7 @@ int NPUTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, std::vector<ke
|
|||
MS_LOG(ERROR) << "New nchw tensor failed when inserting post nchw2nhwc kernel.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nc2nh_tensor->set_allocator(context_->allocator.get());
|
||||
nc2nh_tensor->set_allocator(context_->allocator);
|
||||
all_tensors_->push_back(nc2nh_tensor);
|
||||
auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++);
|
||||
nc2nh_tensor->set_tensor_name(name + "/input0");
|
||||
|
|
|
@ -183,7 +183,7 @@ int SubGraphNpuKernel::BuildNPUOutputOp() {
|
|||
for (auto node : out_nodes_) {
|
||||
for (auto tensor : node->out_tensors()) {
|
||||
if (std::find(out_tensors.begin(), out_tensors.end(), tensor) != out_tensors.end()) {
|
||||
tensor->set_allocator(Context()->allocator.get());
|
||||
tensor->set_allocator(Context()->allocator);
|
||||
this->out_tensor_sorted_[i++] = tensor;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
namespace mindspore::lite::opencl {
|
||||
class OpenCLExecutor : public Executor {
|
||||
public:
|
||||
OpenCLExecutor() : Executor() { allocator_ = ocl_runtime.GetInstance()->GetAllocator(); }
|
||||
OpenCLExecutor() : Executor() { allocator_ = ocl_runtime.GetInstance()->GetAllocator().get(); }
|
||||
|
||||
~OpenCLExecutor() override = default;
|
||||
|
||||
|
|
|
@ -286,7 +286,7 @@ int OpenCLRuntime::Init() {
|
|||
return ms_ret;
|
||||
}
|
||||
|
||||
allocator_ = new (std::nothrow) OpenCLAllocator(this);
|
||||
allocator_ = std::make_shared<OpenCLAllocator>(this);
|
||||
if (allocator_ == nullptr) {
|
||||
delete device_;
|
||||
delete context_;
|
||||
|
@ -312,7 +312,6 @@ int OpenCLRuntime::Uninit() {
|
|||
}
|
||||
StoreCache();
|
||||
program_map_.clear();
|
||||
delete allocator_;
|
||||
delete default_command_queue_;
|
||||
delete profiling_command_queue_;
|
||||
delete context_;
|
||||
|
|
|
@ -54,7 +54,7 @@ class OpenCLRuntime {
|
|||
|
||||
cl::Context *Context();
|
||||
cl::Device *Device();
|
||||
OpenCLAllocator *GetAllocator() { return allocator_; }
|
||||
std::shared_ptr<OpenCLAllocator> GetAllocator() { return allocator_; }
|
||||
cl::CommandQueue *GetDefaultCommandQueue() { return profiling_ ? profiling_command_queue_ : default_command_queue_; }
|
||||
uint64_t DeviceGlobalMemoryCacheSize() const;
|
||||
int DeviceMaxWorkGroupSize() const;
|
||||
|
@ -174,7 +174,7 @@ class OpenCLRuntime {
|
|||
cl::CommandQueue *profiling_command_queue_{nullptr};
|
||||
cl::Context *context_{nullptr};
|
||||
cl::Device *device_{nullptr};
|
||||
OpenCLAllocator *allocator_{nullptr};
|
||||
std::shared_ptr<OpenCLAllocator> allocator_{nullptr};
|
||||
std::map<std::pair<std::string, std::string>, cl::Program> program_map_;
|
||||
cl::Program binary_program_;
|
||||
uint64_t global_memery_cachesize_{0};
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "src/runtime/kernel/opencl/opencl_subgraph.h"
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "src/runtime/gpu/opencl/opencl_executor.h"
|
||||
|
@ -363,7 +364,7 @@ int OpenCLSubGraph::Prepare() {
|
|||
if (all_kernels_infer_done_) {
|
||||
auto opencl_exec = reinterpret_cast<lite::opencl::OpenCLExecutor *>(executor_);
|
||||
// If tuning_mode is DEFAULT, just malloc memory for reuse.
|
||||
auto ret = opencl_exec->RunOrTune(in_tensors(), out_tensors(), nodes_, allocator_, nullptr, nullptr, true);
|
||||
auto ret = opencl_exec->RunOrTune(in_tensors(), out_tensors(), nodes_, allocator_.get(), nullptr, nullptr, true);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Run opencl executor failed: " << ret;
|
||||
return ret;
|
||||
|
@ -441,7 +442,7 @@ int OpenCLSubGraph::Execute() {
|
|||
}
|
||||
}
|
||||
|
||||
ret = executor_->Run(in_tensors(), out_tensors(), nodes_, allocator_);
|
||||
ret = executor_->Run(in_tensors(), out_tensors(), nodes_, allocator_.get());
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Run opencl executor failed: " << ret;
|
||||
return ret;
|
||||
|
@ -470,7 +471,7 @@ int OpenCLSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &
|
|||
}
|
||||
}
|
||||
|
||||
ret = executor_->Run(in_tensors(), out_tensors(), nodes_, allocator_, before, after);
|
||||
ret = executor_->Run(in_tensors(), out_tensors(), nodes_, allocator_.get(), before, after);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Run opencl executor failed: " << ret;
|
||||
return ret;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SUBGRAPH_OPENCL_KERNEL_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SUBGRAPH_OPENCL_KERNEL_H_
|
||||
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include "src/runtime/kernel/opencl/opencl_kernel.h"
|
||||
|
@ -74,7 +75,7 @@ class OpenCLSubGraph : public SubGraphKernel {
|
|||
using PassFunc = int (OpenCLSubGraph::*)(void);
|
||||
|
||||
private:
|
||||
lite::opencl::OpenCLAllocator *allocator_{nullptr};
|
||||
std::shared_ptr<lite::opencl::OpenCLAllocator> allocator_{nullptr};
|
||||
std::vector<lite::Tensor *> in_convert_tensors_;
|
||||
std::vector<lite::Tensor *> out_convert_tensors_;
|
||||
std::vector<OpenCLToFormatParameter *> in_parameters_;
|
||||
|
|
|
@ -57,6 +57,17 @@ using kernel::KERNEL_ARCH::kGPU;
|
|||
using kernel::KERNEL_ARCH::kNPU;
|
||||
namespace {
|
||||
constexpr int kMainSubGraphIndex = 0;
|
||||
kernel::SubGraphKernel *CreateCustomSubGraph(std::vector<kernel::LiteKernel *> &&input_kernels,
|
||||
std::vector<kernel::LiteKernel *> &&output_kernels,
|
||||
const std::vector<kernel::LiteKernel *> &kernels, kernel::Kernel *kernel) {
|
||||
auto sub_kernel = new (std::nothrow) kernel::CustomSubGraph(input_kernels, output_kernels, kernels, kernel);
|
||||
if (sub_kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "create custom subgraph failed!";
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return sub_kernel;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
|
||||
|
@ -846,6 +857,9 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel
|
|||
}
|
||||
std::vector<kernel::LiteKernel *> input_kernels = kernel::LiteKernelUtil::SubgraphInputNodes(kernels);
|
||||
std::vector<kernel::LiteKernel *> output_kernels = kernel::LiteKernelUtil::SubgraphOutputNodes(kernels);
|
||||
if (type == kernel::kCustomSubGraph) {
|
||||
return CreateCustomSubGraph(std::move(input_kernels), std::move(output_kernels), kernels, innerkernel);
|
||||
}
|
||||
if (type == kernel::kGpuSubGraph) {
|
||||
#if GPU_OPENCL
|
||||
auto sub_kernel = new (std::nothrow) kernel::OpenCLSubGraph(input_kernels, output_kernels, kernels, innerkernel);
|
||||
|
@ -962,7 +976,11 @@ kernel::SubGraphType Scheduler::GetKernelSubGraphType(const kernel::LiteKernel *
|
|||
if (kernel == nullptr) {
|
||||
return kernel::kNotSubGraph;
|
||||
}
|
||||
|
||||
auto desc = kernel->desc();
|
||||
if (desc.provider != kernel::kBuiltin) {
|
||||
return kernel::kCustomSubGraph;
|
||||
}
|
||||
if (desc.arch == kernel::KERNEL_ARCH::kGPU) {
|
||||
return kernel::kGpuSubGraph;
|
||||
} else if (desc.arch == kernel::KERNEL_ARCH::kNPU) {
|
||||
|
|
|
@ -145,6 +145,52 @@ void SubGraphKernel::DropNode(LiteKernel *node) {
|
|||
lite::VectorErase(&out_nodes_, node);
|
||||
}
|
||||
|
||||
int CustomSubGraph::Prepare() {
|
||||
auto ret = SubGraphKernel::Prepare();
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
if (nodes_.size() < 1) {
|
||||
return RET_OK;
|
||||
}
|
||||
auto provider = nodes_[0]->desc().provider;
|
||||
auto context = this->Context();
|
||||
AllocatorPtr allocator = nullptr;
|
||||
auto iter = std::find_if(context->device_list_.begin(), context->device_list_.end(),
|
||||
[&provider](const auto &dev) { return dev.provider_ == provider; });
|
||||
if (iter != context->device_list_.end()) {
|
||||
allocator = iter->allocator_;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < nodes_.size() - 1; ++i) {
|
||||
auto node = nodes_[i];
|
||||
for (auto tensor : node->out_tensors()) {
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
tensor->set_allocator(allocator);
|
||||
}
|
||||
}
|
||||
|
||||
auto node = nodes_[nodes_.size() - 1];
|
||||
for (auto tensor : node->out_tensors()) {
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
tensor->set_allocator(this->Context()->allocator);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int CustomSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) {
|
||||
for (auto kernel : nodes_) {
|
||||
MS_ASSERT(kernel != nullptr);
|
||||
auto ret = kernel->Execute(before, after);
|
||||
if (RET_OK != ret) {
|
||||
MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int CpuSubGraph::Prepare() {
|
||||
auto ret = SubGraphKernel::Prepare();
|
||||
if (ret != RET_OK) {
|
||||
|
@ -153,7 +199,7 @@ int CpuSubGraph::Prepare() {
|
|||
for (auto node : nodes_) {
|
||||
for (auto tensor : node->out_tensors()) {
|
||||
MS_ASSERT(tensor != nullptr);
|
||||
tensor->set_allocator(this->Context()->allocator.get());
|
||||
tensor->set_allocator(this->Context()->allocator);
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
|
@ -236,7 +282,7 @@ int CpuFp16SubGraph::Float32TensorToFloat16Tensor(lite::Tensor *tensor) {
|
|||
tensor->allocator()->SetRefCount(tensor->data_c(), tensor->allocator()->RefCount(float32_data));
|
||||
}
|
||||
auto *data_store =
|
||||
DataStore::CreateDataStore(float32_data, own_data, tensor->allocator(), this->Context()->allocator.get());
|
||||
DataStore::CreateDataStore(float32_data, own_data, tensor->allocator().get(), this->Context()->allocator.get());
|
||||
if (data_store == nullptr) {
|
||||
MS_LOG(ERROR) << "Create DataStore failed";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -229,5 +229,21 @@ class CpuFp16SubGraph : public CpuSubGraph {
|
|||
std::map<lite::Tensor *, DataStore *> origin_input_data_;
|
||||
};
|
||||
#endif
|
||||
|
||||
class CustomSubGraph : public SubGraphKernel {
|
||||
public:
|
||||
CustomSubGraph(std::vector<LiteKernel *> in_kernels, std::vector<LiteKernel *> out_kernels,
|
||||
std::vector<LiteKernel *> nodes, Kernel *kernel)
|
||||
: SubGraphKernel(std::move(in_kernels), std::move(out_kernels), std::move(nodes), kernel) {
|
||||
subgraph_type_ = kCustomSubGraph;
|
||||
desc_.arch = kernel::KERNEL_ARCH::kCustom;
|
||||
}
|
||||
|
||||
~CustomSubGraph() override { delete this->executor_; }
|
||||
int Prepare() override;
|
||||
int Init() override { return SubGraphKernel::Init(); }
|
||||
int Execute() override { return Execute(nullptr, nullptr); }
|
||||
int Execute(const KernelCallBack &before, const KernelCallBack &after) override;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_SUB_GRAPH_H
|
||||
|
|
|
@ -286,12 +286,12 @@ void Tensor::set_root_tensor(Tensor *tensor) {
|
|||
this->quant_clusters_ = this->root_tensor_->quant_clusters_;
|
||||
}
|
||||
|
||||
int Tensor::MallocData(const mindspore::Allocator *allocator) {
|
||||
int Tensor::MallocData(const AllocatorPtr allocator) {
|
||||
if (nullptr != this->data_) {
|
||||
return RET_OK;
|
||||
}
|
||||
if (allocator != nullptr) {
|
||||
allocator_ = const_cast<mindspore::Allocator *>(allocator);
|
||||
allocator_ = allocator;
|
||||
}
|
||||
auto data_size = this->Size();
|
||||
if (data_size > kMaxMallocSize) {
|
||||
|
|
|
@ -103,11 +103,11 @@ class Tensor : public mindspore::tensor::MSTensor {
|
|||
|
||||
size_t Size() const override;
|
||||
|
||||
void set_allocator(mindspore::Allocator *allocator) override { allocator_ = allocator; }
|
||||
void set_allocator(AllocatorPtr allocator) override { allocator_ = allocator; }
|
||||
|
||||
mindspore::Allocator *allocator() const override { return this->allocator_; }
|
||||
AllocatorPtr allocator() const override { return allocator_; }
|
||||
|
||||
virtual int MallocData(const mindspore::Allocator *allocator = nullptr);
|
||||
virtual int MallocData(const AllocatorPtr allocator = nullptr);
|
||||
|
||||
virtual void FreeData();
|
||||
|
||||
|
@ -221,7 +221,7 @@ class Tensor : public mindspore::tensor::MSTensor {
|
|||
int init_ref_count_ = 0;
|
||||
std::vector<QuantArg> quant_params_;
|
||||
std::vector<float> quant_clusters_;
|
||||
mindspore::Allocator *allocator_ = nullptr;
|
||||
AllocatorPtr allocator_ = nullptr;
|
||||
Tensor *root_tensor_ = nullptr;
|
||||
bool own_data_{false};
|
||||
};
|
||||
|
|
|
@ -113,9 +113,9 @@ int TensorList::MallocTensorListData(TypeId dtype, const std::vector<std::vector
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int TensorList::MallocData(const mindspore::Allocator *allocator) {
|
||||
int TensorList::MallocData(const AllocatorPtr allocator) {
|
||||
if (allocator != nullptr) {
|
||||
allocator_ = const_cast<mindspore::Allocator *>(allocator);
|
||||
allocator_ = allocator;
|
||||
}
|
||||
// malloc data buf of each tensor in tensors_
|
||||
for (int i = 0; i < this->ElementsNum(); ++i) {
|
||||
|
|
|
@ -77,7 +77,7 @@ class TensorList : public Tensor {
|
|||
|
||||
int MallocTensorListData(TypeId dtype, const std::vector<std::vector<int> > &tensor_shape);
|
||||
|
||||
int MallocData(const mindspore::Allocator *allocator = nullptr) override;
|
||||
int MallocData(const AllocatorPtr allocator = nullptr) override;
|
||||
|
||||
int FreeTensorListData();
|
||||
|
||||
|
|
Loading…
Reference in New Issue