diff --git a/mindspore/lite/include/context.h b/mindspore/lite/include/context.h index f8637eddd35..0d736e951c5 100644 --- a/mindspore/lite/include/context.h +++ b/mindspore/lite/include/context.h @@ -51,6 +51,7 @@ struct DeviceContext { DeviceInfo device_info_; std::string provider_{}; std::string provider_device_{}; + AllocatorPtr allocator_ = nullptr; }; /// \brief Context defined for holding environment variables during runtime. diff --git a/mindspore/lite/include/ms_tensor.h b/mindspore/lite/include/ms_tensor.h index ab1f4aa46e1..9cdaee185f5 100644 --- a/mindspore/lite/include/ms_tensor.h +++ b/mindspore/lite/include/ms_tensor.h @@ -42,12 +42,12 @@ class MS_API MSTensor { /// \brief Set memory allocator for current MSTensor. /// /// \param[in] allocator Define memory allocator, which is shown in allocator.h. - virtual void set_allocator(mindspore::Allocator *allocator) = 0; + virtual void set_allocator(AllocatorPtr allocator) = 0; /// \brief Get memory allocator of current MSTensor. /// /// \return Pointer of memory allocator class. - virtual mindspore::Allocator *allocator() const = 0; + virtual AllocatorPtr allocator() const = 0; /// \brief Get data type of the MindSpore Lite MSTensor. /// diff --git a/mindspore/lite/include/registry/register_kernel.h b/mindspore/lite/include/registry/register_kernel.h index 9be1ca37c28..d879785c349 100644 --- a/mindspore/lite/include/registry/register_kernel.h +++ b/mindspore/lite/include/registry/register_kernel.h @@ -28,9 +28,6 @@ namespace mindspore { namespace kernel { -extern const char *const kArchCPU; -extern const char *const kArchGPU; - /// \brief KernelDesc defined kernel's basic attribute. struct MS_API KernelDesc { TypeId data_type; /**< kernel data type argument */ diff --git a/mindspore/lite/micro/coder/generator/component/const_blocks/mtensor.cc b/mindspore/lite/micro/coder/generator/component/const_blocks/mtensor.cc index 5b66ef50ed7..8cc48318587 100644 --- a/mindspore/lite/micro/coder/generator/component/const_blocks/mtensor.cc +++ b/mindspore/lite/micro/coder/generator/component/const_blocks/mtensor.cc @@ -60,8 +60,8 @@ class MTensor : public mindspore::tensor::MSTensor { MTensor(String name, TypeId type, Vector shape) : tensor_name_(name), data_type_(type), shape_(shape) {} ~MTensor() override; - void set_allocator(mindspore::Allocator *allocator) override {} - mindspore::Allocator *allocator() const override { return nullptr; } + void set_allocator(AllocatorPtr allocator) override {} + AllocatorPtr allocator() const override { return nullptr; } TypeId data_type() const override { return data_type_; } void set_data_type(TypeId data_type) override { data_type_ = data_type; } Vector shape() const override { return shape_; } diff --git a/mindspore/lite/micro/example/mnist_stm32f746/mnist_stm32f746/include/ms_tensor.h b/mindspore/lite/micro/example/mnist_stm32f746/mnist_stm32f746/include/ms_tensor.h index ab1f4aa46e1..e64bc3fd725 100755 --- a/mindspore/lite/micro/example/mnist_stm32f746/mnist_stm32f746/include/ms_tensor.h +++ b/mindspore/lite/micro/example/mnist_stm32f746/mnist_stm32f746/include/ms_tensor.h @@ -39,15 +39,10 @@ class MS_API MSTensor { static MSTensor *CreateTensor(const String &name, TypeId type, const Vector &shape, const void *data, size_t data_len); - /// \brief Set memory allocator for current MSTensor. - /// - /// \param[in] allocator Define memory allocator, which is shown in allocator.h. - virtual void set_allocator(mindspore::Allocator *allocator) = 0; - /// \brief Get memory allocator of current MSTensor. /// /// \return Pointer of memory allocator class. - virtual mindspore::Allocator *allocator() const = 0; + virtual AllocatorPtr allocator() const = 0; /// \brief Get data type of the MindSpore Lite MSTensor. /// diff --git a/mindspore/lite/micro/example/mnist_x86/src/tensor.h b/mindspore/lite/micro/example/mnist_x86/src/tensor.h index 9aa3f4bd424..caaa99c301d 100644 --- a/mindspore/lite/micro/example/mnist_x86/src/tensor.h +++ b/mindspore/lite/micro/example/mnist_x86/src/tensor.h @@ -41,8 +41,8 @@ class MTensor : public mindspore::tensor::MSTensor { MTensor(String name, TypeId type, Vector shape) : tensor_name_(name), data_type_(type), shape_(shape) {} ~MTensor() override; - void set_allocator(mindspore::Allocator *allocator) override {} - mindspore::Allocator *allocator() const override { return nullptr; } + void set_allocator(AllocatorPtr allocator) override {} + AllocatorPtr allocator() const override { return nullptr; } TypeId data_type() const override { return data_type_; } void set_data_type(TypeId data_type) override { data_type_ = data_type; } Vector shape() const override { return shape_; } diff --git a/mindspore/lite/src/kernel_registry.cc b/mindspore/lite/src/kernel_registry.cc index 5b12cf05d76..05a21fe5db4 100644 --- a/mindspore/lite/src/kernel_registry.cc +++ b/mindspore/lite/src/kernel_registry.cc @@ -39,6 +39,8 @@ using mindspore::kernel::KernelKey; namespace mindspore::lite { namespace { +const char *const kArchCPU = "CPU"; +const char *const kArchGPU = "GPU"; void KernelKeyToKernelDesc(const KernelKey &key, kernel::KernelDesc *desc) { MS_ASSERT(desc != nullptr); desc->data_type = key.data_type; @@ -158,7 +160,15 @@ int KernelRegistry::GetKernel(const std::vector &in_tensors, const std if (base_kernel != nullptr) { auto *lite_kernel = new (std::nothrow) kernel::LiteKernel(base_kernel); if (lite_kernel != nullptr) { - lite_kernel->set_desc(key); + kernel::KernelKey tmp_key = key; + if (tmp_key.provider == kArchCPU) { + tmp_key.arch = kernel::kCPU; + } else if (tmp_key.provider == kArchGPU) { + tmp_key.arch = kernel::kGPU; + } else { + tmp_key.arch = kernel::kCustom; + } + lite_kernel->set_desc(tmp_key); *kernel = lite_kernel; return RET_OK; } diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h index aed1f17490b..5c7d92df78c 100644 --- a/mindspore/lite/src/lite_kernel.h +++ b/mindspore/lite/src/lite_kernel.h @@ -36,7 +36,7 @@ #include "src/inner_kernel.h" namespace mindspore::kernel { -enum KERNEL_ARCH { kCPU, kGPU, kAPU, kNPU, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU }; +enum KERNEL_ARCH { kCPU, kGPU, kAPU, kNPU, kCustom, kKernelArch_MIN = kCPU, kKernelArch_MAX = kNPU }; static const char *const kBuiltin = "Builtin"; struct KernelKey { @@ -61,7 +61,15 @@ struct KernelKey { } }; -enum SubGraphType { kNotSubGraph = 0, kCpuFP32SubGraph, kCpuFP16SubGraph, kGpuSubGraph, kNpuSubGraph, kApuSubGraph }; +enum SubGraphType { + kNotSubGraph = 0, + kCpuFP32SubGraph, + kCpuFP16SubGraph, + kGpuSubGraph, + kNpuSubGraph, + kApuSubGraph, + kCustomSubGraph +}; class LiteKernel { public: diff --git a/mindspore/lite/src/registry/register_kernel.cc b/mindspore/lite/src/registry/register_kernel.cc index 895988f5060..8d2ea5657bc 100644 --- a/mindspore/lite/src/registry/register_kernel.cc +++ b/mindspore/lite/src/registry/register_kernel.cc @@ -20,9 +20,6 @@ namespace mindspore { namespace kernel { -const char *const kArchCPU = "CPU"; -const char *const kArchGPU = "GPU"; - int RegisterKernel::RegCustomKernel(const std::string &arch, const std::string &provider, TypeId data_type, const std::string &type, CreateKernel creator) { return lite::RegistryKernelImpl::GetInstance()->RegCustomKernel(arch, provider, data_type, type, creator); diff --git a/mindspore/lite/src/registry/register_kernel_impl.cc b/mindspore/lite/src/registry/register_kernel_impl.cc index 1484fb80b2c..0f53ca7053f 100644 --- a/mindspore/lite/src/registry/register_kernel_impl.cc +++ b/mindspore/lite/src/registry/register_kernel_impl.cc @@ -128,7 +128,13 @@ kernel::CreateKernel RegistryKernelImpl::GetProviderCreator(const kernel::Kernel return nullptr; } for (auto &&item : kernel_creators_) { + if (item.first != desc.provider) { + continue; + } for (auto &&arch_item : item.second) { + if (arch_item.first != desc.arch) { + continue; + } creator = arch_item.second[index]; if (creator != nullptr) { break; diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc index d3d9c9e8938..6799798870d 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_insert_transform_pass.cc @@ -129,7 +129,7 @@ int NPUInsertTransformPass::InsertNode(kernel::LiteKernel *kernel, kernel::LiteK MS_LOG(ERROR) << "New nchw tensor failed when inserting nchw2nhwc kernel."; return RET_ERROR; } - nh2nc_tensor->set_allocator(context_->allocator.get()); + nh2nc_tensor->set_allocator(context_->allocator); nh2nc_tensor->set_tensor_name(nh2nc_name + "/output0"); std::vector nh2nc_tensors = {nh2nc_tensor}; all_tensors_->push_back(nh2nc_tensors[0]); @@ -140,7 +140,7 @@ int NPUInsertTransformPass::InsertNode(kernel::LiteKernel *kernel, kernel::LiteK MS_LOG(ERROR) << "New nhwc tensor failed when inserting nhwc2nchw kernel."; return RET_ERROR; } - nc2nh_tensor->set_allocator(context_->allocator.get()); + nc2nh_tensor->set_allocator(context_->allocator); nc2nh_tensor->set_tensor_name(nc2nh_name + "/output0"); std::vector nc2nh_tensors = {nc2nh_tensor}; all_tensors_->push_back(nc2nh_tensors[0]); diff --git a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc index c6bab39a229..44c33d7c97f 100644 --- a/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc +++ b/mindspore/lite/src/runtime/agent/npu/optimizer/npu_transform_pass.cc @@ -51,7 +51,7 @@ int NPUTransformPass::InsertPreNodes(kernel::LiteKernel *kernel, std::vectorset_allocator(context_->allocator.get()); + tensor->set_allocator(context_->allocator); auto name = kernel->name() + "_pre_trans" + "_Nhwc2Nchw_" + std::to_string(total++); tensor->set_tensor_name(name + "/output0"); std::vector pre_trans_out_tensors = {tensor}; @@ -112,7 +112,7 @@ int NPUTransformPass::InsertPostNodes(kernel::LiteKernel *kernel, std::vectorset_allocator(context_->allocator.get()); + nc2nh_tensor->set_allocator(context_->allocator); all_tensors_->push_back(nc2nh_tensor); auto name = kernel->name() + "_post_trans" + "_Nchw2Nhwc" + std::to_string(total++); nc2nh_tensor->set_tensor_name(name + "/input0"); diff --git a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc index 17ddeca8034..991057dccb1 100644 --- a/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc +++ b/mindspore/lite/src/runtime/agent/npu/subgraph_npu_kernel.cc @@ -183,7 +183,7 @@ int SubGraphNpuKernel::BuildNPUOutputOp() { for (auto node : out_nodes_) { for (auto tensor : node->out_tensors()) { if (std::find(out_tensors.begin(), out_tensors.end(), tensor) != out_tensors.end()) { - tensor->set_allocator(Context()->allocator.get()); + tensor->set_allocator(Context()->allocator); this->out_tensor_sorted_[i++] = tensor; } } diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.h b/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.h index fa96c1c5463..1eda4207747 100644 --- a/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.h +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_executor.h @@ -27,7 +27,7 @@ namespace mindspore::lite::opencl { class OpenCLExecutor : public Executor { public: - OpenCLExecutor() : Executor() { allocator_ = ocl_runtime.GetInstance()->GetAllocator(); } + OpenCLExecutor() : Executor() { allocator_ = ocl_runtime.GetInstance()->GetAllocator().get(); } ~OpenCLExecutor() override = default; diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc index 98a5bf6473b..80ddc16ebf0 100644 --- a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc @@ -286,7 +286,7 @@ int OpenCLRuntime::Init() { return ms_ret; } - allocator_ = new (std::nothrow) OpenCLAllocator(this); + allocator_ = std::make_shared(this); if (allocator_ == nullptr) { delete device_; delete context_; @@ -312,7 +312,6 @@ int OpenCLRuntime::Uninit() { } StoreCache(); program_map_.clear(); - delete allocator_; delete default_command_queue_; delete profiling_command_queue_; delete context_; diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h index 7adb8abb08c..88676cda0b0 100644 --- a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h @@ -54,7 +54,7 @@ class OpenCLRuntime { cl::Context *Context(); cl::Device *Device(); - OpenCLAllocator *GetAllocator() { return allocator_; } + std::shared_ptr GetAllocator() { return allocator_; } cl::CommandQueue *GetDefaultCommandQueue() { return profiling_ ? profiling_command_queue_ : default_command_queue_; } uint64_t DeviceGlobalMemoryCacheSize() const; int DeviceMaxWorkGroupSize() const; @@ -174,7 +174,7 @@ class OpenCLRuntime { cl::CommandQueue *profiling_command_queue_{nullptr}; cl::Context *context_{nullptr}; cl::Device *device_{nullptr}; - OpenCLAllocator *allocator_{nullptr}; + std::shared_ptr allocator_{nullptr}; std::map, cl::Program> program_map_; cl::Program binary_program_; uint64_t global_memery_cachesize_{0}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc index b0affac4d41..f1dd1a2e88a 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc @@ -17,6 +17,7 @@ #include "src/runtime/kernel/opencl/opencl_subgraph.h" #include #include +#include #include #include #include "src/runtime/gpu/opencl/opencl_executor.h" @@ -363,7 +364,7 @@ int OpenCLSubGraph::Prepare() { if (all_kernels_infer_done_) { auto opencl_exec = reinterpret_cast(executor_); // If tuning_mode is DEFAULT, just malloc memory for reuse. - auto ret = opencl_exec->RunOrTune(in_tensors(), out_tensors(), nodes_, allocator_, nullptr, nullptr, true); + auto ret = opencl_exec->RunOrTune(in_tensors(), out_tensors(), nodes_, allocator_.get(), nullptr, nullptr, true); if (ret != RET_OK) { MS_LOG(ERROR) << "Run opencl executor failed: " << ret; return ret; @@ -441,7 +442,7 @@ int OpenCLSubGraph::Execute() { } } - ret = executor_->Run(in_tensors(), out_tensors(), nodes_, allocator_); + ret = executor_->Run(in_tensors(), out_tensors(), nodes_, allocator_.get()); if (ret != RET_OK) { MS_LOG(ERROR) << "Run opencl executor failed: " << ret; return ret; @@ -470,7 +471,7 @@ int OpenCLSubGraph::Execute(const KernelCallBack &before, const KernelCallBack & } } - ret = executor_->Run(in_tensors(), out_tensors(), nodes_, allocator_, before, after); + ret = executor_->Run(in_tensors(), out_tensors(), nodes_, allocator_.get(), before, after); if (ret != RET_OK) { MS_LOG(ERROR) << "Run opencl executor failed: " << ret; return ret; diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h index d2b63ae13dc..1606eecb489 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.h @@ -17,6 +17,7 @@ #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SUBGRAPH_OPENCL_KERNEL_H_ #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SUBGRAPH_OPENCL_KERNEL_H_ +#include #include #include #include "src/runtime/kernel/opencl/opencl_kernel.h" @@ -74,7 +75,7 @@ class OpenCLSubGraph : public SubGraphKernel { using PassFunc = int (OpenCLSubGraph::*)(void); private: - lite::opencl::OpenCLAllocator *allocator_{nullptr}; + std::shared_ptr allocator_{nullptr}; std::vector in_convert_tensors_; std::vector out_convert_tensors_; std::vector in_parameters_; diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index 6bc9b44faf4..b73374fa7f5 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -57,6 +57,17 @@ using kernel::KERNEL_ARCH::kGPU; using kernel::KERNEL_ARCH::kNPU; namespace { constexpr int kMainSubGraphIndex = 0; +kernel::SubGraphKernel *CreateCustomSubGraph(std::vector &&input_kernels, + std::vector &&output_kernels, + const std::vector &kernels, kernel::Kernel *kernel) { + auto sub_kernel = new (std::nothrow) kernel::CustomSubGraph(input_kernels, output_kernels, kernels, kernel); + if (sub_kernel == nullptr) { + MS_LOG(ERROR) << "create custom subgraph failed!"; + delete kernel; + return nullptr; + } + return sub_kernel; +} } // namespace int Scheduler::Schedule(std::vector *dst_kernels) { @@ -846,6 +857,9 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector input_kernels = kernel::LiteKernelUtil::SubgraphInputNodes(kernels); std::vector output_kernels = kernel::LiteKernelUtil::SubgraphOutputNodes(kernels); + if (type == kernel::kCustomSubGraph) { + return CreateCustomSubGraph(std::move(input_kernels), std::move(output_kernels), kernels, innerkernel); + } if (type == kernel::kGpuSubGraph) { #if GPU_OPENCL auto sub_kernel = new (std::nothrow) kernel::OpenCLSubGraph(input_kernels, output_kernels, kernels, innerkernel); @@ -962,7 +976,11 @@ kernel::SubGraphType Scheduler::GetKernelSubGraphType(const kernel::LiteKernel * if (kernel == nullptr) { return kernel::kNotSubGraph; } + auto desc = kernel->desc(); + if (desc.provider != kernel::kBuiltin) { + return kernel::kCustomSubGraph; + } if (desc.arch == kernel::KERNEL_ARCH::kGPU) { return kernel::kGpuSubGraph; } else if (desc.arch == kernel::KERNEL_ARCH::kNPU) { diff --git a/mindspore/lite/src/sub_graph_kernel.cc b/mindspore/lite/src/sub_graph_kernel.cc index ff8623b4473..45a57ed522b 100644 --- a/mindspore/lite/src/sub_graph_kernel.cc +++ b/mindspore/lite/src/sub_graph_kernel.cc @@ -145,6 +145,52 @@ void SubGraphKernel::DropNode(LiteKernel *node) { lite::VectorErase(&out_nodes_, node); } +int CustomSubGraph::Prepare() { + auto ret = SubGraphKernel::Prepare(); + if (ret != RET_OK) { + return ret; + } + if (nodes_.size() < 1) { + return RET_OK; + } + auto provider = nodes_[0]->desc().provider; + auto context = this->Context(); + AllocatorPtr allocator = nullptr; + auto iter = std::find_if(context->device_list_.begin(), context->device_list_.end(), + [&provider](const auto &dev) { return dev.provider_ == provider; }); + if (iter != context->device_list_.end()) { + allocator = iter->allocator_; + } + + for (size_t i = 0; i < nodes_.size() - 1; ++i) { + auto node = nodes_[i]; + for (auto tensor : node->out_tensors()) { + MS_ASSERT(tensor != nullptr); + tensor->set_allocator(allocator); + } + } + + auto node = nodes_[nodes_.size() - 1]; + for (auto tensor : node->out_tensors()) { + MS_ASSERT(tensor != nullptr); + tensor->set_allocator(this->Context()->allocator); + } + return RET_OK; +} + +int CustomSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) { + for (auto kernel : nodes_) { + MS_ASSERT(kernel != nullptr); + auto ret = kernel->Execute(before, after); + if (RET_OK != ret) { + MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name(); + return ret; + } + } + + return RET_OK; +} + int CpuSubGraph::Prepare() { auto ret = SubGraphKernel::Prepare(); if (ret != RET_OK) { @@ -153,7 +199,7 @@ int CpuSubGraph::Prepare() { for (auto node : nodes_) { for (auto tensor : node->out_tensors()) { MS_ASSERT(tensor != nullptr); - tensor->set_allocator(this->Context()->allocator.get()); + tensor->set_allocator(this->Context()->allocator); } } return RET_OK; @@ -236,7 +282,7 @@ int CpuFp16SubGraph::Float32TensorToFloat16Tensor(lite::Tensor *tensor) { tensor->allocator()->SetRefCount(tensor->data_c(), tensor->allocator()->RefCount(float32_data)); } auto *data_store = - DataStore::CreateDataStore(float32_data, own_data, tensor->allocator(), this->Context()->allocator.get()); + DataStore::CreateDataStore(float32_data, own_data, tensor->allocator().get(), this->Context()->allocator.get()); if (data_store == nullptr) { MS_LOG(ERROR) << "Create DataStore failed"; return RET_ERROR; diff --git a/mindspore/lite/src/sub_graph_kernel.h b/mindspore/lite/src/sub_graph_kernel.h index f77bfa4d0bc..a117f26a527 100644 --- a/mindspore/lite/src/sub_graph_kernel.h +++ b/mindspore/lite/src/sub_graph_kernel.h @@ -229,5 +229,21 @@ class CpuFp16SubGraph : public CpuSubGraph { std::map origin_input_data_; }; #endif + +class CustomSubGraph : public SubGraphKernel { + public: + CustomSubGraph(std::vector in_kernels, std::vector out_kernels, + std::vector nodes, Kernel *kernel) + : SubGraphKernel(std::move(in_kernels), std::move(out_kernels), std::move(nodes), kernel) { + subgraph_type_ = kCustomSubGraph; + desc_.arch = kernel::KERNEL_ARCH::kCustom; + } + + ~CustomSubGraph() override { delete this->executor_; } + int Prepare() override; + int Init() override { return SubGraphKernel::Init(); } + int Execute() override { return Execute(nullptr, nullptr); } + int Execute(const KernelCallBack &before, const KernelCallBack &after) override; +}; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_SUB_GRAPH_H diff --git a/mindspore/lite/src/tensor.cc b/mindspore/lite/src/tensor.cc index 9443980915d..49af949ad7c 100644 --- a/mindspore/lite/src/tensor.cc +++ b/mindspore/lite/src/tensor.cc @@ -286,12 +286,12 @@ void Tensor::set_root_tensor(Tensor *tensor) { this->quant_clusters_ = this->root_tensor_->quant_clusters_; } -int Tensor::MallocData(const mindspore::Allocator *allocator) { +int Tensor::MallocData(const AllocatorPtr allocator) { if (nullptr != this->data_) { return RET_OK; } if (allocator != nullptr) { - allocator_ = const_cast(allocator); + allocator_ = allocator; } auto data_size = this->Size(); if (data_size > kMaxMallocSize) { diff --git a/mindspore/lite/src/tensor.h b/mindspore/lite/src/tensor.h index 6c7b0e7ea0e..f26fe4f875c 100644 --- a/mindspore/lite/src/tensor.h +++ b/mindspore/lite/src/tensor.h @@ -103,11 +103,11 @@ class Tensor : public mindspore::tensor::MSTensor { size_t Size() const override; - void set_allocator(mindspore::Allocator *allocator) override { allocator_ = allocator; } + void set_allocator(AllocatorPtr allocator) override { allocator_ = allocator; } - mindspore::Allocator *allocator() const override { return this->allocator_; } + AllocatorPtr allocator() const override { return allocator_; } - virtual int MallocData(const mindspore::Allocator *allocator = nullptr); + virtual int MallocData(const AllocatorPtr allocator = nullptr); virtual void FreeData(); @@ -221,7 +221,7 @@ class Tensor : public mindspore::tensor::MSTensor { int init_ref_count_ = 0; std::vector quant_params_; std::vector quant_clusters_; - mindspore::Allocator *allocator_ = nullptr; + AllocatorPtr allocator_ = nullptr; Tensor *root_tensor_ = nullptr; bool own_data_{false}; }; diff --git a/mindspore/lite/src/tensorlist.cc b/mindspore/lite/src/tensorlist.cc index fe397757239..74cf64f9ddb 100644 --- a/mindspore/lite/src/tensorlist.cc +++ b/mindspore/lite/src/tensorlist.cc @@ -113,9 +113,9 @@ int TensorList::MallocTensorListData(TypeId dtype, const std::vector(allocator); + allocator_ = allocator; } // malloc data buf of each tensor in tensors_ for (int i = 0; i < this->ElementsNum(); ++i) { diff --git a/mindspore/lite/src/tensorlist.h b/mindspore/lite/src/tensorlist.h index d5e25924e03..8a51814392f 100644 --- a/mindspore/lite/src/tensorlist.h +++ b/mindspore/lite/src/tensorlist.h @@ -77,7 +77,7 @@ class TensorList : public Tensor { int MallocTensorListData(TypeId dtype, const std::vector > &tensor_shape); - int MallocData(const mindspore::Allocator *allocator = nullptr) override; + int MallocData(const AllocatorPtr allocator = nullptr) override; int FreeTensorListData();