forked from mindspore-Ecosystem/mindspore
!28408 [MS][LITE] dpico custom kernel memory optimize
Merge pull request !28408 from jianghui58/dpico_dev_mem
This commit is contained in:
commit
388e96c2ff
|
@ -147,7 +147,9 @@ CreateKernel RegisterKernel::GetCreator(const schema::Primitive *primitive, Kern
|
|||
return nullptr;
|
||||
}
|
||||
KernelDescHelper kernel_desc = {desc->data_type, desc->type, StringToChar(desc->arch), StringToChar(desc->provider)};
|
||||
return GetCreator(primitive, &kernel_desc);
|
||||
auto ret = GetCreator(primitive, &kernel_desc);
|
||||
desc->arch = CharToString(kernel_desc.arch);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/// \brief Defined registering macro to register ordinary op kernel, which called by user directly.
|
||||
|
|
|
@ -37,6 +37,9 @@
|
|||
#include "src/weight_decoder.h"
|
||||
#include "src/runtime/runtime_allocator.h"
|
||||
#include "src/lite_kernel_util.h"
|
||||
#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
|
||||
#include "src/registry/register_kernel_impl.h"
|
||||
#endif
|
||||
#ifdef ENABLE_MINDRT
|
||||
#include "src/mindrt_executor.h"
|
||||
#endif
|
||||
|
@ -61,6 +64,9 @@ extern void common_log_init();
|
|||
#endif
|
||||
namespace lite {
|
||||
namespace {
|
||||
#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
|
||||
const char *const kArchCPU = "CPU";
|
||||
#endif
|
||||
bool NeedBitUppackCheck(const SchemaTensorWrapper &src_tensor) {
|
||||
MS_ASSERT(src_tensor.handler() != nullptr);
|
||||
MS_ASSERT(src_tensor.data() != nullptr);
|
||||
|
@ -106,6 +112,23 @@ int DecompressTensor(const SchemaTensorWrapper &src_tensor, Tensor *dst_tensor)
|
|||
#endif
|
||||
}
|
||||
}
|
||||
#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
|
||||
bool ExistCustomCpuKernel() {
|
||||
auto custom_kernel_creators = registry::RegistryKernelImpl::GetInstance()->GetCustomKernelCreators();
|
||||
for (const auto &custom_kernel_creator : custom_kernel_creators) { // <provider, <arch, <type, CreateKernel*>>>
|
||||
if (custom_kernel_creator.second.empty()) {
|
||||
continue;
|
||||
}
|
||||
if (std::any_of(custom_kernel_creator.second.begin(), custom_kernel_creator.second.end(),
|
||||
[](const std::pair<std::string, std::unordered_map<std::string, registry::CreateKernel *>> &pair) {
|
||||
return pair.first == kArchCPU && !pair.second.empty();
|
||||
})) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
LiteSession::LiteSession() {
|
||||
|
@ -1377,6 +1400,11 @@ int LiteSession::RuntimeAllocatorInit() {
|
|||
if (RuntimeAllocatorValid() != RET_OK) {
|
||||
return RET_OK;
|
||||
}
|
||||
#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
|
||||
if (ExistCustomCpuKernel()) {
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
if (runtime_allocator_ == nullptr) {
|
||||
runtime_allocator_ = std::shared_ptr<RuntimeAllocator>(new (std::nothrow) RuntimeAllocator());
|
||||
} else {
|
||||
|
|
|
@ -208,6 +208,9 @@ void MindrtExecutor::TransferGraphOutput() {
|
|||
reinterpret_cast<float *>(dst_tensor->data()), dst_tensor->ElementsNum());
|
||||
} else {
|
||||
#endif
|
||||
if (dst_tensor->allocator() != src_tensor->allocator()) {
|
||||
dst_tensor->set_allocator(src_tensor->allocator());
|
||||
}
|
||||
dst_tensor->set_data(src_tensor->data());
|
||||
if (IS_RUNTIME_ALLOCATOR(src_tensor->allocator()) == false) {
|
||||
src_tensor->set_data(nullptr);
|
||||
|
|
|
@ -50,7 +50,9 @@ CreateKernel RegisterKernel::GetCreator(const schema::Primitive *primitive, Kern
|
|||
return nullptr;
|
||||
}
|
||||
KernelDesc kernel_desc = {desc->data_type, desc->type, CharToString(desc->arch), CharToString(desc->provider)};
|
||||
return RegistryKernelImpl::GetInstance()->GetProviderCreator(primitive, &kernel_desc);
|
||||
auto ret = RegistryKernelImpl::GetInstance()->GetProviderCreator(primitive, &kernel_desc);
|
||||
desc->arch = StringToChar(kernel_desc.arch);
|
||||
return ret;
|
||||
#else
|
||||
MS_LOG(ERROR) << unsupport_custom_kernel_register_log;
|
||||
return nullptr;
|
||||
|
|
|
@ -48,6 +48,11 @@ class RegistryKernelImpl {
|
|||
return kernel_creators_;
|
||||
}
|
||||
|
||||
const std::map<std::string, std::map<std::string, std::unordered_map<std::string, registry::CreateKernel *>>>
|
||||
&GetCustomKernelCreators() const {
|
||||
return custom_kernel_creators_;
|
||||
}
|
||||
|
||||
protected:
|
||||
// keys:provider, arch
|
||||
std::map<std::string, std::unordered_map<std::string, registry::CreateKernel *>> kernel_creators_;
|
||||
|
|
|
@ -129,12 +129,15 @@ std::shared_ptr<Kernel> TestCustomAddCreator(const std::vector<MSTensor> &inputs
|
|||
std::shared_ptr<KernelInterface> CustomAddInferCreator() { return std::make_shared<TestCustomOpInfer>(); }
|
||||
} // namespace
|
||||
|
||||
REGISTER_CUSTOM_KERNEL(CPU, BuiltInTest, kFloat32, Add, TestCustomAddCreator)
|
||||
REGISTER_CUSTOM_KERNEL_INTERFACE(BuiltInTest, Add, CustomAddInferCreator)
|
||||
|
||||
class TestRegistryCustomOp : public mindspore::CommonTest {
|
||||
public:
|
||||
TestRegistryCustomOp() = default;
|
||||
void SetUp() override {
|
||||
static mindspore::registry::KernelReg g_CPUBuiltInTestkFloat32AddkernelReg("CPU", "BuiltInTest", kFloat32, "Add",
|
||||
TestCustomAddCreator);
|
||||
static mindspore::registry::KernelInterfaceReg g_BuiltInTestAdd_custom_inter_reg("BuiltInTest", "Add",
|
||||
CustomAddInferCreator);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(TestRegistryCustomOp, TestCustomAdd) {
|
||||
|
|
|
@ -106,6 +106,7 @@ set(LITE_SRC ${API_SRC}
|
|||
${SRC_DIR}/ms_tensor.cc
|
||||
${SRC_DIR}/tensorlist.cc
|
||||
${SRC_DIR}/kernel_registry.cc
|
||||
${SRC_DIR}/registry/register_kernel_impl.cc
|
||||
${SRC_DIR}/inner_kernel.cc
|
||||
${SRC_DIR}/lite_kernel.cc
|
||||
${SRC_DIR}/lite_kernel_util.cc
|
||||
|
|
Loading…
Reference in New Issue