forked from mindspore-Ecosystem/mindspore
!37274 support ascend cloud infer
Merge pull request !37274 from zhengyuanhua/br3
This commit is contained in:
commit
69aa258eb2
|
@ -84,7 +84,7 @@ std::string OpAdapterImpl::GetCustomOpType(const PrimitivePtr &prim) const {
|
|||
MS_EXCEPTION_IF_NULL(prim);
|
||||
auto value = prim->GetAttr("reg_op_name");
|
||||
if (value == nullptr) {
|
||||
MS_LOG(ERROR) << "Custom op has no func_type attr.";
|
||||
MS_LOG(ERROR) << "Custom op has no reg_op_name attr.";
|
||||
return "";
|
||||
}
|
||||
auto op_type = GetValue<std::string>(value);
|
||||
|
|
|
@ -1441,6 +1441,10 @@ bool MSANFModelParser::BuildAttrForFuncGraph(const FuncGraphPtr &outputFuncGraph
|
|||
outputFuncGraph->set_attr(attr_proto.name(), ParseAttrInSingleScalar_int32_t_bool(attr_proto));
|
||||
break;
|
||||
}
|
||||
case mind_ir::AttributeProto_AttributeType_INT32: {
|
||||
outputFuncGraph->set_attr(attr_proto.name(), ParseAttrInSingleScalar_int32_t_int32_t(attr_proto));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MS_LOG(ERROR) << "Obtain attr for graph has not support input type: " << attr_type << "!";
|
||||
return false;
|
||||
|
|
|
@ -202,6 +202,13 @@ if(DEFINED ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE})
|
|||
set(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE $ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE})
|
||||
endif()
|
||||
|
||||
if(MSLITE_ENABLE_ACL AND MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
|
||||
set(PLATFORM_ARM64 off)
|
||||
set(PLATFORM_ARM32 off)
|
||||
set(MSLITE_ENABLE_FP16 off)
|
||||
set(ENABLE_NEON off)
|
||||
endif()
|
||||
|
||||
if(MACHINE_LINUX_ARM64)
|
||||
add_compile_definitions(MACHINE_LINUX_ARM64)
|
||||
add_compile_definitions(LINUX_RUNTIME)
|
||||
|
|
|
@ -31,7 +31,13 @@ if(MSLITE_DEPS_OPENCV)
|
|||
endif()
|
||||
|
||||
if(MSLITE_DEPS_MKLDNN)
|
||||
include(${TOP_DIR}/cmake/external_libs/mkl_dnn.cmake)
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
set(USE_MS_THREADPOOL_FOR_DNNL ON)
|
||||
endif()
|
||||
if(USE_MS_THREADPOOL_FOR_DNNL)
|
||||
add_compile_definitions(USE_MS_THREADPOOL_FOR_DNNL)
|
||||
endif()
|
||||
include(${TOP_DIR}/cmake/external_libs/mkl_dnn.cmake)
|
||||
endif()
|
||||
|
||||
if(MSLITE_DEPS_LIBEVENT)
|
||||
|
@ -47,6 +53,7 @@ if(MSLITE_DEPS_PYBIND11)
|
|||
include_directories(${Python3_NumPy_INCLUDE_DIRS})
|
||||
include_directories(${TOP_DIR})
|
||||
include_directories(${CORE_DIR})
|
||||
set(PYBIND11_CPP_STANDARD -std=c++17)
|
||||
include(${TOP_DIR}/cmake/external_libs/pybind11.cmake)
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
@ -421,9 +421,13 @@ add_subdirectory(runtime/kernel/cpu)
|
|||
add_library(lite_src_mid OBJECT ${LITE_SRC})
|
||||
add_dependencies(lite_src_mid fbs_src)
|
||||
|
||||
if(MSLITE_ENABLE_ACL AND NOT MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
|
||||
if(MSLITE_ENABLE_ACL)
|
||||
include_directories(${TOP_DIR}/graphengine/inc/external)
|
||||
add_subdirectory(runtime/kernel/ascend)
|
||||
if(NOT MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
|
||||
add_subdirectory(runtime/kernel/ascend)
|
||||
else()
|
||||
add_compile_definitions(ENABLE_CLOUD_FUSION_INFERENCE)
|
||||
endif()
|
||||
link_directories(${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
endif()
|
||||
|
||||
|
|
|
@ -13,6 +13,8 @@ if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
|
|||
add_compile_definitions(USE_GLOG)
|
||||
string(REPLACE "-fno-rtti" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
|
||||
string(REPLACE "-fno-rtti" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REPLACE "-fno-exceptions" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
|
||||
string(REPLACE "-fno-exceptions" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
add_compile_definitions(ENABLE_CLOUD_FUSION_INFERENCE)
|
||||
remove_definitions(-DBUILD_LITE_INFERENCE)
|
||||
set(MINDIR_MODEL_SRC
|
||||
|
@ -165,7 +167,9 @@ if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
|
|||
|
||||
if(MSLITE_ENABLE_ACL)
|
||||
include_directories(${TOP_DIR}/graphengine/inc/external)
|
||||
add_subdirectory(kernel/ascend)
|
||||
link_directories(${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
target_link_libraries(mindspore-extendrt ascend_kernel_mid)
|
||||
endif()
|
||||
|
||||
if(SUPPORT_CUDA)
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
include_directories(${TOP_DIR}/graphengine/inc/external)
|
||||
include_directories(${TOP_DIR}/mindspore)
|
||||
include_directories(${TOP_DIR}/mindspore/lite/src)
|
||||
|
||||
find_library(ge_graph libgraph.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
|
||||
file(GLOB_RECURSE ASCEND_SRC ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"custom_ascend_kernel.cc"
|
||||
"model/*.cc"
|
||||
)
|
||||
|
||||
add_library(ascend_kernel_mid OBJECT ${ASCEND_SRC})
|
||||
|
||||
add_dependencies(ascend_kernel_mid fbs_inner_src)
|
||||
if("${MSLITE_REGISTRY_DEVICE}" STREQUAL "SD3403" AND PLATFORM_ARM64)
|
||||
find_library(ge_graph libgraph.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(acl libascendcl.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(acl_retr libacl_retr.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(acl_cblas libacl_cblas.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(acl_runtime libruntime.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
target_link_libraries(ascend_kernel_mid ${ge_graph} ${acl} ${acl_retr} ${acl_cblas} ${acl_runtime})
|
||||
else()
|
||||
target_link_libraries(ascend_kernel_mid ${ge_graph} ${ge_compiler}
|
||||
${acl_retr} ${acl_cblas} ${acl_dvpp} ${acl_runtime} ${libplatform}
|
||||
${libcompress} ${libopskernel} ${libaicore_utils} ${libaicpu_engine_common} ${acl})
|
||||
endif()
|
|
@ -0,0 +1,242 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "extendrt/kernel/ascend/custom_ascend_kernel.h"
|
||||
#include <utility>
|
||||
#include "include/registry/register_kernel.h"
|
||||
#include "include/api/types.h"
|
||||
#include "include/api/data_type.h"
|
||||
#include "extendrt/kernel/ascend/model/model_infer.h"
|
||||
#include "extendrt/kernel/ascend/options/acl_options_parser.h"
|
||||
#include "core/ops/custom.h"
|
||||
#include "plugin/factory/ms_factory.h"
|
||||
#include "src/common/log_util.h"
|
||||
#include "common/log_adapter.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
CustomAscendKernelMod::CustomAscendKernelMod()
|
||||
: load_model_(false), acl_options_(nullptr), dyn_shape_proc_(nullptr), model_infer_(nullptr), input_data_idx_(0) {}
|
||||
|
||||
CustomAscendKernelMod::~CustomAscendKernelMod() {
|
||||
if (load_model_) {
|
||||
int ret = model_infer_->Finalize();
|
||||
if (ret != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Model finalize failed.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CustomAscendKernelMod::RecordInputDataIndex(const std::vector<KernelTensorPtr> &inputs) {
|
||||
for (size_t idx = 0; idx < inputs.size(); ++idx) {
|
||||
if (inputs[idx] == nullptr) {
|
||||
MS_LOG(ERROR) << "Input " << idx << " is invalid.";
|
||||
return;
|
||||
}
|
||||
if (inputs[idx]->GetData() == nullptr) {
|
||||
input_data_idx_ = idx;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool CustomAscendKernelMod::InitParam(const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) {
|
||||
if (inputs.empty() || outputs.empty()) {
|
||||
MS_LOG(ERROR) << "Custom kernel has empty inputs or outputs, which is invalid.";
|
||||
return false;
|
||||
}
|
||||
inputs_.assign(inputs.begin(), inputs.end() - 1);
|
||||
outputs_.assign(outputs.begin(), outputs.end());
|
||||
acl_options_ = std::make_shared<AclModelOptions>();
|
||||
if (acl_options_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Create AclModelOptions failed.";
|
||||
return false;
|
||||
}
|
||||
// AclOptionsParser parser;
|
||||
// if (parser.ParseAclOptions(context_, &acl_options_) != lite::RET_OK) {
|
||||
// MS_LOG(ERROR) << "Parse model options failed.";
|
||||
// return false;
|
||||
// }
|
||||
// last input is om data tensor
|
||||
int idx = inputs.size() - 1;
|
||||
if (inputs[idx] == nullptr || inputs[idx]->GetData() == nullptr) {
|
||||
MS_LOG(ERROR) << "Input " << idx << " is invalid.";
|
||||
return false;
|
||||
}
|
||||
Buffer om_data(inputs[idx]->GetData()->addr, inputs[idx]->GetData()->size);
|
||||
model_infer_ = std::make_shared<ModelInfer>(om_data, acl_options_);
|
||||
if (model_infer_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Create ModelInfer failed.";
|
||||
return false;
|
||||
}
|
||||
RecordInputDataIndex(inputs);
|
||||
dyn_shape_proc_ = std::make_shared<DynShapeProcess>(acl_options_, input_data_idx_);
|
||||
if (dyn_shape_proc_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Create DynShapeProcess failed.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CustomAscendKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) {
|
||||
if (load_model_) {
|
||||
MS_LOG(INFO) << "Om has been loaded in custom kernel.";
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
auto kernel_ptr = std::dynamic_pointer_cast<ops::Custom>(base_operator);
|
||||
if (!kernel_ptr) {
|
||||
MS_LOG(ERROR) << "Cast Custom ops failed!";
|
||||
return false;
|
||||
}
|
||||
if (!InitParam(inputs, outputs)) {
|
||||
MS_LOG(ERROR) << "Init param failed.";
|
||||
return false;
|
||||
}
|
||||
if (LoadModel() != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Load model failed.";
|
||||
return false;
|
||||
}
|
||||
|
||||
load_model_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
int CustomAscendKernelMod::LoadModel() {
|
||||
int ret = model_infer_->Init();
|
||||
if (ret != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Model infer init failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
ret = model_infer_->Load();
|
||||
if (ret != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Load om data failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
acl_options_->batch_size = model_infer_->GetDynamicBatch();
|
||||
acl_options_->image_size = model_infer_->GetDynamicImage();
|
||||
|
||||
MS_LOG(INFO) << "Load om data success.";
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
int CustomAscendKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) {
|
||||
if (!load_model_) {
|
||||
MS_LOG(WARNING) << "Model has not been loaded, start to load when resize.";
|
||||
if (!Init(base_operator, inputs, outputs)) {
|
||||
MS_LOG(ERROR) << "Load model failed when resize.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
int CustomAscendKernelMod::SetInputAndOutputAddr(const std::vector<AddressPtr> &inputs,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
if ((inputs_.size() + 1) != inputs.size()) {
|
||||
MS_LOG(ERROR) << "Size of inputs in init [" << (inputs_.size() + 1) << "] and "
|
||||
<< "size of inputs in launch [" << inputs.size() << "] are not equal.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
if (outputs_.size() != outputs.size()) {
|
||||
MS_LOG(ERROR) << "Size of outputs in init (" << outputs_.size() << ") and "
|
||||
<< "size of outputs in launch (" << outputs.size() << ") are not equal.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
for (size_t i = 0; i < inputs_.size(); ++i) {
|
||||
if (inputs[i]->addr == nullptr || inputs[i]->size == 0) {
|
||||
MS_LOG(ERROR) << "Input " << i << " addr is invalid.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
inputs_[i]->SetData(inputs[i]);
|
||||
}
|
||||
for (size_t j = 0; j < outputs_.size(); ++j) {
|
||||
if (outputs[j]->addr == nullptr || inputs[j]->size == 0) {
|
||||
MS_LOG(ERROR) << "Output " << j << " addr is invalid.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
outputs_[j]->SetData(outputs[j]);
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
bool CustomAscendKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
|
||||
if (!load_model_) {
|
||||
MS_LOG(ERROR) << "Init custom ascend kernel has been not ready.";
|
||||
return false;
|
||||
}
|
||||
if (SetInputAndOutputAddr(inputs, outputs) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Check input and output param failed.";
|
||||
return false;
|
||||
}
|
||||
if (dyn_shape_proc_->ProcDynamicInput(&inputs_) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Proc dynamic batch size input failed.";
|
||||
return false;
|
||||
}
|
||||
if (model_infer_->Inference(inputs_, outputs_) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Custom kernel execute failed.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// std::shared_ptr<kernel::Kernel> CustomCreateKernel(const std::vector<mindspore::MSTensor> &inputs,
|
||||
// const std::vector<mindspore::MSTensor> &outputs,
|
||||
// const schema::Primitive *primitive, const mindspore::Context *ctx)
|
||||
// {
|
||||
// if (primitive == nullptr) {
|
||||
// MS_LOG(ERROR) << "Primitive is nullptr.";
|
||||
// return nullptr;
|
||||
// }
|
||||
// if (primitive->value_type() != schema::PrimitiveType_Custom) {
|
||||
// MS_LOG(ERROR) << "Primitive type is not PrimitiveType_Custom";
|
||||
// return nullptr;
|
||||
// }
|
||||
//
|
||||
// auto kernel = std::make_shared<CustomAscendKernel>(inputs, outputs, primitive, ctx);
|
||||
// if (kernel == nullptr) {
|
||||
// MS_LOG(ERROR) << "New custom kernel is nullptr";
|
||||
// return nullptr;
|
||||
// }
|
||||
// return kernel;
|
||||
// }
|
||||
|
||||
MS_KERNEL_FACTORY_REG(KernelMod, CustomAscend, CustomAscendKernelMod);
|
||||
} // namespace acl
|
||||
} // namespace mindspore::kernel
|
||||
namespace mindspore {
|
||||
namespace registry {
|
||||
namespace {
|
||||
const auto kFloat32 = DataType::kNumberTypeFloat32;
|
||||
const auto kFloat16 = DataType::kNumberTypeFloat16;
|
||||
const auto kInt32 = DataType::kNumberTypeInt32;
|
||||
const auto kInt8 = DataType::kNumberTypeInt8;
|
||||
const auto kUInt8 = DataType::kNumberTypeUInt8;
|
||||
const auto kBool = DataType::kNumberTypeBool;
|
||||
} // namespace
|
||||
// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kFloat32, ACL, kernel::acl::CustomCreateKernel)
|
||||
// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kFloat16, ACL, kernel::acl::CustomCreateKernel)
|
||||
// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kInt32, ACL, kernel::acl::CustomCreateKernel)
|
||||
// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kInt8, ACL, kernel::acl::CustomCreateKernel)
|
||||
// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kUInt8, ACL, kernel::acl::CustomCreateKernel)
|
||||
// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kBool, ACL, kernel::acl::CustomCreateKernel)
|
||||
} // namespace registry
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,68 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_CUSTOM_ASCEND_KERNEL_H_
|
||||
#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_CUSTOM_ASCEND_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <map>
|
||||
#include "extendrt/kernel/ascend/options/acl_model_options.h"
|
||||
#include "extendrt/kernel/ascend/model/model_infer.h"
|
||||
#include "extendrt/kernel/ascend/model/dyn_shape_process.h"
|
||||
#include "include/api/types.h"
|
||||
#include "include/api/context.h"
|
||||
#include "kernel/kernel.h"
|
||||
#include "kernel/common_utils.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
class CustomAscendKernelMod : public kernel::KernelMod {
|
||||
public:
|
||||
CustomAscendKernelMod();
|
||||
~CustomAscendKernelMod() override;
|
||||
|
||||
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) override;
|
||||
|
||||
int Resize(
|
||||
const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost = std::map<uint32_t, tensor::TensorPtr>()) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
|
||||
private:
|
||||
void RecordInputDataIndex(const std::vector<KernelTensorPtr> &inputs);
|
||||
bool InitParam(const std::vector<KernelTensorPtr> &inputs, const std::vector<KernelTensorPtr> &outputs);
|
||||
int SetInputAndOutputAddr(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
|
||||
int LoadModel();
|
||||
|
||||
bool load_model_;
|
||||
std::vector<KernelTensorPtr> inputs_;
|
||||
std::vector<KernelTensorPtr> outputs_;
|
||||
AclModelOptionsPtr acl_options_;
|
||||
DynShapeProcPtr dyn_shape_proc_;
|
||||
ModelInferPtr model_infer_;
|
||||
size_t input_data_idx_;
|
||||
};
|
||||
} // namespace acl
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_CUSTOM_ASCEND_KERNEL_H_
|
|
@ -0,0 +1,60 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "extendrt/kernel/ascend/model/acl_env_guard.h"
|
||||
#include "common/log_adapter.h"
|
||||
#include "acl/acl.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
std::shared_ptr<AclEnvGuard> AclEnvGuard::global_acl_env_ = nullptr;
|
||||
std::mutex AclEnvGuard::global_acl_env_mutex_;
|
||||
|
||||
AclEnvGuard::AclEnvGuard(std::string_view cfg_file) {
|
||||
errno_ = aclInit(cfg_file.data());
|
||||
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) {
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed";
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Acl init success";
|
||||
}
|
||||
|
||||
AclEnvGuard::~AclEnvGuard() { (void)aclFinalize(); }
|
||||
|
||||
std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv(std::string_view cfg_file) {
|
||||
std::shared_ptr<AclEnvGuard> acl_env;
|
||||
|
||||
std::lock_guard<std::mutex> lock(global_acl_env_mutex_);
|
||||
acl_env = global_acl_env_;
|
||||
if (acl_env != nullptr) {
|
||||
MS_LOG(INFO) << "Acl has been initialized, skip.";
|
||||
if (!cfg_file.empty()) {
|
||||
MS_LOG(WARNING) << "Dump config file option " << cfg_file << " is ignored.";
|
||||
}
|
||||
} else {
|
||||
acl_env = std::make_shared<AclEnvGuard>(cfg_file);
|
||||
aclError ret = acl_env->GetErrno();
|
||||
if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) {
|
||||
MS_LOG(ERROR) << "Execute aclInit Failed";
|
||||
return nullptr;
|
||||
}
|
||||
global_acl_env_ = acl_env;
|
||||
MS_LOG(INFO) << "Acl init success";
|
||||
}
|
||||
return acl_env;
|
||||
}
|
||||
} // namespace acl
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,42 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_ACL_ENV_GUARD_H_
|
||||
#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_ACL_ENV_GUARD_H_
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include "acl/acl_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
class AclEnvGuard {
|
||||
public:
|
||||
explicit AclEnvGuard(std::string_view cfg_file);
|
||||
~AclEnvGuard();
|
||||
aclError GetErrno() const { return errno_; }
|
||||
static std::shared_ptr<AclEnvGuard> GetAclEnv(std::string_view cfg_file);
|
||||
|
||||
private:
|
||||
static std::shared_ptr<AclEnvGuard> global_acl_env_;
|
||||
static std::mutex global_acl_env_mutex_;
|
||||
|
||||
aclError errno_;
|
||||
};
|
||||
} // namespace acl
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_ACL_ENV_GUARD_H_
|
|
@ -0,0 +1,179 @@
|
|||
/**
|
||||
* Copyright 2021-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "extendrt/kernel/ascend/model/dyn_shape_process.h"
|
||||
#include <utility>
|
||||
#include "mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
namespace {
|
||||
constexpr auto kInputDimNum = 4;
|
||||
constexpr auto kNHWCHeightIdx = 1;
|
||||
constexpr auto kNHWCWidthIdx = 2;
|
||||
constexpr auto kNCHWHeightIdx = 2;
|
||||
constexpr auto kNCHWWidthIdx = 3;
|
||||
constexpr auto kImageSizeHwNum = 2;
|
||||
} // namespace
|
||||
|
||||
int DynShapeProcess::ProcDynamicInput(std::vector<KernelTensorPtr> *const inputs) {
|
||||
MS_CHECK_TRUE_MSG(acl_options_ != nullptr, lite::RET_ERROR, "Acl options ptr is nullptr.");
|
||||
if (acl_options_->batch_size.empty() && acl_options_->image_size.empty()) {
|
||||
MS_LOG(INFO) << "Inputs are not dynamic mode.";
|
||||
return lite::RET_OK;
|
||||
}
|
||||
if (!acl_options_->batch_size.empty() && !acl_options_->image_size.empty()) {
|
||||
MS_LOG(ERROR) << "Batch size and image size can't be set at the same time.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
MS_CHECK_TRUE_MSG(inputs != nullptr, lite::RET_ERROR, "Inputs is nullptr.");
|
||||
if (!acl_options_->batch_size.empty()) {
|
||||
if (AddBatchSizeInput(inputs) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Add batch size input failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
}
|
||||
if (!acl_options_->image_size.empty()) {
|
||||
if (AddImageSizeInput(inputs) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Add Image size input failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
int DynShapeProcess::AddBatchSizeInput(std::vector<KernelTensorPtr> *const inputs) {
|
||||
int32_t *batch_size_addr = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t)));
|
||||
if (batch_size_addr == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc batch size failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
if (GetRealBatchSize(inputs, batch_size_addr) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Get real batch size failed.";
|
||||
free(batch_size_addr);
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto batch_size_ptr = std::make_shared<Address>(batch_size_addr, sizeof(int32_t));
|
||||
if (batch_size_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "Create Address failed.";
|
||||
free(batch_size_addr);
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto tensor_ptr = std::make_shared<KernelTensor>();
|
||||
if (tensor_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "Create KernelTensor failed.";
|
||||
free(batch_size_addr);
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
|
||||
tensor_ptr->SetData(batch_size_ptr);
|
||||
inputs->emplace_back(tensor_ptr);
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
int DynShapeProcess::AddImageSizeInput(std::vector<KernelTensorPtr> *const inputs) {
|
||||
int32_t *image_size_addr = reinterpret_cast<int32_t *>(malloc(kImageSizeHwNum * sizeof(int32_t)));
|
||||
if (image_size_addr == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc image size failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
if (GetRealImageSize(inputs, image_size_addr, kImageSizeHwNum) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Get real image size failed.";
|
||||
free(image_size_addr);
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto image_size_ptr = std::make_shared<Address>(image_size_addr, kImageSizeHwNum * sizeof(int32_t));
|
||||
if (image_size_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "Create Address failed.";
|
||||
free(image_size_addr);
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto tensor_ptr = std::make_shared<KernelTensor>();
|
||||
if (tensor_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "Create KernelTensor failed.";
|
||||
free(image_size_addr);
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
|
||||
tensor_ptr->SetData(image_size_ptr);
|
||||
inputs->emplace_back(tensor_ptr);
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
int DynShapeProcess::GetRealBatchSize(std::vector<KernelTensorPtr> *const inputs, int32_t *batch_size) {
|
||||
MS_CHECK_TRUE_MSG(batch_size != nullptr, lite::RET_ERROR, "Batch size ptr is nullptr.");
|
||||
if (input_data_idx_ >= inputs->size()) {
|
||||
MS_LOG(ERROR) << " Input data index " << input_data_idx_ << " is larger than input size " << inputs->size();
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto tensor = (*inputs)[input_data_idx_];
|
||||
std::vector<int64_t> shape = tensor->GetShapeVector();
|
||||
if (shape.empty()) {
|
||||
MS_LOG(ERROR) << "Shape is empty, input index = " << input_data_idx_;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
int32_t cur_batch_size = static_cast<uint64_t>(shape[0]);
|
||||
auto iter = acl_options_->batch_size.find(cur_batch_size);
|
||||
if (iter == acl_options_->batch_size.end()) {
|
||||
MS_LOG(ERROR) << "Current batch size " << cur_batch_size << " is invalid, please check device info of context";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
*batch_size = cur_batch_size;
|
||||
MS_LOG(DEBUG) << "Current batch size " << cur_batch_size;
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
int DynShapeProcess::GetRealImageSize(std::vector<KernelTensorPtr> *const inputs, int32_t *image_size, int32_t num) {
|
||||
MS_CHECK_TRUE_MSG(image_size != nullptr, lite::RET_ERROR, "Image size ptr is nullptr.");
|
||||
if (input_data_idx_ >= inputs->size()) {
|
||||
MS_LOG(ERROR) << "Input data index " << input_data_idx_ << " is larger than input size " << inputs->size();
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto tensor = (*inputs)[input_data_idx_];
|
||||
std::vector<int64_t> shape = tensor->GetShapeVector();
|
||||
if (shape.size() != kInputDimNum) {
|
||||
MS_LOG(ERROR) << "Shape size " << shape.size() << " is invalid, input index = " << input_data_idx_;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto format = tensor->GetFormat();
|
||||
uint64_t height;
|
||||
uint64_t width;
|
||||
if (format == mindspore::Format::NHWC) {
|
||||
height = shape[kNHWCHeightIdx];
|
||||
width = shape[kNHWCWidthIdx];
|
||||
} else {
|
||||
height = shape[kNCHWHeightIdx];
|
||||
width = shape[kNCHWWidthIdx];
|
||||
}
|
||||
auto cur_image_size = std::pair<int32_t, int32_t>(static_cast<uint64_t>(height), static_cast<uint64_t>(width));
|
||||
auto iter = acl_options_->image_size.find(cur_image_size);
|
||||
if (iter == acl_options_->image_size.end()) {
|
||||
MS_LOG(ERROR) << "Image size height " << height << ",weight " << width
|
||||
<< " is invalid, please check device info of context.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
if (num != kImageSizeHwNum) {
|
||||
MS_LOG(ERROR) << "The hw num should be " << kImageSizeHwNum << ",real num " << num;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
image_size[0] = height;
|
||||
image_size[1] = width;
|
||||
MS_LOG(DEBUG) << "Current height " << height << " width " << width;
|
||||
return lite::RET_OK;
|
||||
}
|
||||
} // namespace acl
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Copyright 2021-2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_DYN_SHAPE_PROCESS_H
|
||||
#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_DYN_SHAPE_PROCESS_H
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "extendrt/kernel/ascend/options/acl_model_options.h"
|
||||
#include "kernel/kernel.h"
|
||||
#include "include/api/types.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
class DynShapeProcess {
|
||||
public:
|
||||
explicit DynShapeProcess(const AclModelOptionsPtr &options, size_t input_data_idx)
|
||||
: acl_options_(options), input_data_idx_(input_data_idx) {}
|
||||
|
||||
int ProcDynamicInput(std::vector<KernelTensorPtr> *const inputs);
|
||||
|
||||
private:
|
||||
int AddBatchSizeInput(std::vector<KernelTensorPtr> *const inputs);
|
||||
int AddImageSizeInput(std::vector<KernelTensorPtr> *const inputs);
|
||||
int GetRealBatchSize(std::vector<KernelTensorPtr> *const inputs, int32_t *batch_size);
|
||||
int GetRealImageSize(std::vector<KernelTensorPtr> *const inputs, int32_t *image_size, int32_t num);
|
||||
|
||||
AclModelOptionsPtr acl_options_;
|
||||
size_t input_data_idx_;
|
||||
};
|
||||
|
||||
using DynShapeProcPtr = std::shared_ptr<DynShapeProcess>;
|
||||
} // namespace acl
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_DYN_SHAPE_PROCESS_H
|
|
@ -0,0 +1,170 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "extendrt/kernel/ascend/model/model_infer.h"
|
||||
#include "common/log_adapter.h"
|
||||
#include "acl/acl.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
ModelInfer::ModelInfer(const Buffer &om_data, const AclModelOptionsPtr &options)
|
||||
: init_flag_(false),
|
||||
load_flag_(false),
|
||||
device_type_("AscendCL"),
|
||||
context_(nullptr),
|
||||
om_data_(om_data),
|
||||
options_(options),
|
||||
model_process_(options),
|
||||
acl_env_(nullptr) {}
|
||||
|
||||
STATUS ModelInfer::Init() {
|
||||
if (init_flag_) {
|
||||
MS_LOG(INFO) << "Acl has been initialized, skip.";
|
||||
return lite::RET_OK;
|
||||
}
|
||||
if (options_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Acl options is nullptr.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
acl_env_ = AclEnvGuard::GetAclEnv(options_->dump_cfg_path);
|
||||
if (acl_env_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Acl init failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
int32_t device_id = options_->device_id;
|
||||
aclError ret = aclrtSetDevice(device_id);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Acl open device " << device_id << " failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
MS_LOG(INFO) << "Open device " << device_id << " success.";
|
||||
|
||||
ret = aclrtCreateContext(&context_, device_id);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Acl create context failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
MS_LOG(INFO) << "Create context success.";
|
||||
|
||||
aclrtRunMode run_mode;
|
||||
ret = aclrtGetRunMode(&run_mode);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Acl get run mode failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
bool is_device = (run_mode == ACL_DEVICE);
|
||||
model_process_.SetIsDevice(is_device);
|
||||
MS_LOG(INFO) << "Get run mode success is device input/output " << is_device;
|
||||
|
||||
MS_LOG(INFO) << "Init model success, device id " << device_id;
|
||||
init_flag_ = true;
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS ModelInfer::Finalize() {
|
||||
if (!init_flag_) {
|
||||
MS_LOG(WARNING) << "Init is not ok, no need to finalize.";
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
aclError rt_ret = aclrtSetCurrentContext(context_);
|
||||
if (rt_ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Set the ascend device context failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
if (load_flag_) {
|
||||
auto ret = model_process_.UnLoad();
|
||||
if (ret != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Unload model inner failed.";
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
if (context_ != nullptr) {
|
||||
rt_ret = aclrtDestroyContext(context_);
|
||||
if (rt_ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Destroy context failed.";
|
||||
}
|
||||
context_ = nullptr;
|
||||
}
|
||||
MS_LOG(INFO) << "End to destroy context.";
|
||||
|
||||
rt_ret = aclrtResetDevice(options_->device_id);
|
||||
if (rt_ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Reset device " << options_->device_id << " failed.";
|
||||
}
|
||||
MS_LOG(INFO) << "End to reset device " << options_->device_id;
|
||||
init_flag_ = false;
|
||||
load_flag_ = false;
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS ModelInfer::Load() {
|
||||
if (!load_flag_) {
|
||||
int ret = LoadAclModel(om_data_);
|
||||
if (ret != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Load model model failed.";
|
||||
return ret;
|
||||
}
|
||||
load_flag_ = true;
|
||||
}
|
||||
|
||||
aclError rt_ret = aclrtSetCurrentContext(context_);
|
||||
if (rt_ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Set the ascend device context failed, ret = " << rt_ret;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS ModelInfer::LoadAclModel(const Buffer &om_data) {
|
||||
MS_LOG(INFO) << "Start load model model.";
|
||||
// model load model
|
||||
uint32_t acl_model_id;
|
||||
auto acl_ret = aclmdlLoadFromMem(om_data.Data(), om_data.DataSize(), &acl_model_id);
|
||||
if (acl_ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Call aclmdlLoadFromMem failed, ret = " << acl_ret;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
|
||||
// model init model resource
|
||||
model_process_.set_model_id(acl_model_id);
|
||||
int ret = model_process_.PreInitModelResource();
|
||||
if (ret != lite::RET_OK) {
|
||||
(void)aclmdlUnload(acl_model_id);
|
||||
MS_LOG(ERROR) << "Pre init model resource failed.";
|
||||
return ret;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Load model model success.";
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS ModelInfer::Inference(const std::vector<KernelTensorPtr> &inputs, const std::vector<KernelTensorPtr> &outputs) {
|
||||
if (Load() != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare model resource failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
|
||||
return model_process_.PredictFromHost(inputs, outputs);
|
||||
}
|
||||
|
||||
std::set<uint64_t> ModelInfer::GetDynamicBatch() { return model_process_.GetDynamicBatch(); }
|
||||
|
||||
// need to be called after model load;
|
||||
std::set<std::pair<uint64_t, uint64_t>> ModelInfer::GetDynamicImage() { return model_process_.GetDynamicImage(); }
|
||||
} // namespace acl
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,65 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_INFER_H_
|
||||
#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_INFER_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include "extendrt/kernel/ascend/model/model_process.h"
|
||||
#include "extendrt/kernel/ascend/model/acl_env_guard.h"
|
||||
#include "extendrt/kernel/ascend/options/acl_model_options.h"
|
||||
#include "include/api/types.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
using mindspore::lite::STATUS;
|
||||
|
||||
class ModelInfer {
|
||||
public:
|
||||
ModelInfer(const Buffer &om_data, const AclModelOptionsPtr &options);
|
||||
~ModelInfer() = default;
|
||||
|
||||
STATUS Init();
|
||||
STATUS Finalize();
|
||||
STATUS Load();
|
||||
STATUS Inference(const std::vector<KernelTensorPtr> &inputs, const std::vector<KernelTensorPtr> &outputs);
|
||||
// need to be called after model load
|
||||
std::set<uint64_t> GetDynamicBatch();
|
||||
// need to be called after model load
|
||||
std::set<std::pair<uint64_t, uint64_t>> GetDynamicImage();
|
||||
|
||||
private:
|
||||
STATUS LoadAclModel(const Buffer &om_data);
|
||||
|
||||
bool init_flag_;
|
||||
bool load_flag_;
|
||||
std::string device_type_;
|
||||
aclrtContext context_;
|
||||
Buffer om_data_;
|
||||
AclModelOptionsPtr options_;
|
||||
ModelProcess model_process_;
|
||||
std::shared_ptr<AclEnvGuard> acl_env_;
|
||||
};
|
||||
|
||||
using ModelInferPtr = std::shared_ptr<ModelInfer>;
|
||||
} // namespace acl
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_INFER_H_
|
|
@ -0,0 +1,642 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "extendrt/kernel/ascend/model/model_process.h"
|
||||
#include <sys/time.h>
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include "common/log_adapter.h"
|
||||
#include "src/common/utils.h"
|
||||
#include "src/common/log_util.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
namespace {
|
||||
constexpr size_t kBatchSizeNum = 1;
|
||||
constexpr size_t kImageSizeHwNum = 2;
|
||||
} // namespace
|
||||
static TypeId TransToDataType(aclDataType data_type) {
|
||||
static const std::map<aclDataType, enum TypeId> data_type_map = {
|
||||
{ACL_FLOAT16, TypeId::kNumberTypeFloat16}, {ACL_FLOAT, TypeId::kNumberTypeFloat32},
|
||||
{ACL_DOUBLE, TypeId::kNumberTypeFloat64}, {ACL_INT8, TypeId::kNumberTypeInt8},
|
||||
{ACL_INT16, TypeId::kNumberTypeInt16}, {ACL_INT32, TypeId::kNumberTypeInt32},
|
||||
{ACL_INT64, TypeId::kNumberTypeInt64}, {ACL_UINT8, TypeId::kNumberTypeUInt8},
|
||||
{ACL_UINT16, TypeId::kNumberTypeUInt16}, {ACL_UINT32, TypeId::kNumberTypeUInt32},
|
||||
{ACL_UINT64, TypeId::kNumberTypeUInt64}, {ACL_BOOL, TypeId::kNumberTypeBool},
|
||||
};
|
||||
auto it = data_type_map.find(data_type);
|
||||
if (it == data_type_map.end()) {
|
||||
return TypeId::kNumberTypeEnd;
|
||||
} else {
|
||||
return it->second;
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
inline static void ClearIfNotNull(T *vec) {
|
||||
if (vec != nullptr) {
|
||||
vec->clear();
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, class U = std::vector<T>>
|
||||
inline static void PushbackIfNotNull(U *vec, T &&item) {
|
||||
if (vec != nullptr) {
|
||||
vec->emplace_back(item);
|
||||
}
|
||||
}
|
||||
|
||||
static STATUS ConstructTensorDesc(const std::vector<AclTensorInfo> &acl_tensor_list, std::vector<std::string> *names,
|
||||
std::vector<std::vector<int64_t>> *shapes, std::vector<enum TypeId> *data_types,
|
||||
std::vector<size_t> *mem_sizes) {
|
||||
ClearIfNotNull(names);
|
||||
ClearIfNotNull(shapes);
|
||||
ClearIfNotNull(data_types);
|
||||
ClearIfNotNull(mem_sizes);
|
||||
for (size_t i = 0; i < acl_tensor_list.size(); ++i) {
|
||||
const auto &info = acl_tensor_list[i];
|
||||
PushbackIfNotNull(names, info.name);
|
||||
PushbackIfNotNull(shapes, info.dims);
|
||||
PushbackIfNotNull(data_types, TransToDataType(info.data_type));
|
||||
PushbackIfNotNull(mem_sizes, info.buffer_size);
|
||||
}
|
||||
|
||||
if (names->size() != acl_tensor_list.size() || shapes->size() != acl_tensor_list.size() ||
|
||||
data_types->size() != acl_tensor_list.size() || mem_sizes->size() != acl_tensor_list.size()) {
|
||||
MS_LOG(ERROR) << "Inner error, size do not match: names size " << names->size() << " shapes size " << shapes->size()
|
||||
<< " data types size " << data_types->size() << " mem sizes size " << mem_sizes->size()
|
||||
<< " acl_tensor_list size " << acl_tensor_list.size();
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
static std::string ShapeToString(const std::vector<int64_t> &shape) {
|
||||
std::string result = "[";
|
||||
for (size_t i = 0; i < shape.size(); ++i) {
|
||||
result += std::to_string(shape[i]);
|
||||
if (i + 1 < shape.size()) {
|
||||
result += ", ";
|
||||
}
|
||||
}
|
||||
result += "]";
|
||||
return result;
|
||||
}
|
||||
|
||||
STATUS ModelProcess::PreInitModelResource() {
|
||||
model_desc_ = aclmdlCreateDesc();
|
||||
aclError acl_ret = aclmdlGetDesc(model_desc_, model_id_);
|
||||
if (acl_ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Read model desc failed, ret = " << acl_ret;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
STATUS ret = InitInputsBuffer();
|
||||
if (ret != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Create input buffer failed.";
|
||||
return ret;
|
||||
}
|
||||
ret = InitOutputsBuffer();
|
||||
if (ret != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Create output buffer failed.";
|
||||
return ret;
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
std::set<uint64_t> ModelProcess::GetDynamicBatch() {
|
||||
if (model_desc_ == nullptr) {
|
||||
MS_LOG(ERROR) << " Model desc is nullptr.";
|
||||
return std::set<uint64_t>();
|
||||
}
|
||||
aclmdlBatch dynamic_batch;
|
||||
if (aclmdlGetDynamicBatch(model_desc_, &dynamic_batch) != ACL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Failed to get dynamic batch.";
|
||||
return std::set<uint64_t>();
|
||||
}
|
||||
size_t batch_count = dynamic_batch.batchCount;
|
||||
if (batch_count > ACL_MAX_BATCH_NUM) {
|
||||
MS_LOG(ERROR) << "Real batch count " << batch_count << " is larger than max " << ACL_MAX_BATCH_NUM;
|
||||
return std::set<uint64_t>();
|
||||
}
|
||||
std::set<uint64_t> batch;
|
||||
for (size_t i = 0; i < dynamic_batch.batchCount; ++i) {
|
||||
batch.insert(dynamic_batch.batch[i]);
|
||||
}
|
||||
return batch;
|
||||
}
|
||||
|
||||
std::set<std::pair<uint64_t, uint64_t>> ModelProcess::GetDynamicImage() {
|
||||
if (model_desc_ == nullptr) {
|
||||
MS_LOG(ERROR) << " Model desc is nullptr.";
|
||||
return std::set<std::pair<uint64_t, uint64_t>>();
|
||||
}
|
||||
aclmdlHW dynamic_hw;
|
||||
if (aclmdlGetDynamicHW(model_desc_, 0, &dynamic_hw) != ACL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Failed to get dynamic hw.";
|
||||
return std::set<std::pair<uint64_t, uint64_t>>();
|
||||
}
|
||||
size_t hw_count = dynamic_hw.hwCount;
|
||||
if (hw_count > ACL_MAX_HW_NUM) {
|
||||
MS_LOG(ERROR) << "Real hw count " << hw_count << " is larger than max " << ACL_MAX_HW_NUM;
|
||||
return std::set<std::pair<uint64_t, uint64_t>>();
|
||||
}
|
||||
std::set<std::pair<uint64_t, uint64_t>> image;
|
||||
for (size_t i = 0; i < dynamic_hw.hwCount; ++i) {
|
||||
image.insert(std::pair<uint64_t, uint64_t>(dynamic_hw.hw[i][0], dynamic_hw.hw[i][1]));
|
||||
}
|
||||
return image;
|
||||
}
|
||||
|
||||
STATUS ModelProcess::InitInputsBuffer() {
|
||||
aclError ret;
|
||||
size_t input_size = aclmdlGetNumInputs(model_desc_);
|
||||
MS_LOG(INFO) << "input_size = " << input_size;
|
||||
for (size_t i = 0; i < input_size; ++i) {
|
||||
auto buffer_size = aclmdlGetInputSizeByIndex(model_desc_, i);
|
||||
void *data_mem_buffer = nullptr;
|
||||
if (!is_run_on_device_) { // need to copy input/output to/from device
|
||||
ret = aclrtMalloc(&data_mem_buffer, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Malloc device input buffer failed , input size " << buffer_size;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
aclmdlIODims dims;
|
||||
ret = aclmdlGetInputDims(model_desc_, i, &dims);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Get input shape failed, ret = " << ret;
|
||||
if (!is_run_on_device_) {
|
||||
aclrtFree(data_mem_buffer);
|
||||
}
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
aclDataType data_type = aclmdlGetInputDataType(model_desc_, i);
|
||||
std::vector<int64_t> shape(dims.dims, dims.dims + dims.dimCount);
|
||||
std::string input_name = aclmdlGetInputNameByIndex(model_desc_, i);
|
||||
if (input_name.empty()) {
|
||||
MS_LOG(WARNING) << "Get name of input " << i << " failed.";
|
||||
}
|
||||
MS_LOG(INFO) << "Name of input " << i << " is " << input_name;
|
||||
input_infos_.emplace_back(
|
||||
AclTensorInfo{data_mem_buffer, data_mem_buffer, buffer_size, data_type, shape, input_name});
|
||||
}
|
||||
MS_LOG(INFO) << "Create model inputs success";
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS ModelProcess::CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) {
|
||||
if (data_mem_buffer == nullptr) {
|
||||
MS_LOG(ERROR) << "Data mem buffer is nullptr.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
aclError ret;
|
||||
auto free_data_buffer = [this](void *dataMemBuffer) {
|
||||
if (!is_run_on_device_) {
|
||||
(void)aclrtFree(dataMemBuffer);
|
||||
} else {
|
||||
(void)aclrtFreeHost(dataMemBuffer);
|
||||
}
|
||||
};
|
||||
|
||||
if (!is_run_on_device_) {
|
||||
ret = aclrtMalloc(data_mem_buffer, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Malloc device buffer failed , buffer size " << buffer_size;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
ret = aclrtMallocHost(data_mem_buffer, buffer_size);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Malloc host buffer failed , buffer size " << buffer_size;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
auto data_buffer = aclCreateDataBuffer(*data_mem_buffer, buffer_size);
|
||||
if (data_buffer == nullptr) {
|
||||
MS_LOG(ERROR) << "Create Data Buffer failed";
|
||||
free_data_buffer(*data_mem_buffer);
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
ret = aclmdlAddDatasetBuffer(dataset, data_buffer);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "add data buffer failed";
|
||||
free_data_buffer(*data_mem_buffer);
|
||||
aclDestroyDataBuffer(data_buffer);
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS ModelProcess::InitOutputsBuffer() {
|
||||
aclError ret;
|
||||
outputs_ = aclmdlCreateDataset();
|
||||
if (outputs_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Create output dataset failed";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
size_t output_size = aclmdlGetNumOutputs(model_desc_);
|
||||
MS_LOG(INFO) << "Output_size = " << output_size;
|
||||
for (size_t i = 0; i < output_size; ++i) {
|
||||
auto buffer_size = aclmdlGetOutputSizeByIndex(model_desc_, i);
|
||||
|
||||
void *data_mem_buffer = nullptr;
|
||||
if (CreateDataBuffer(&data_mem_buffer, buffer_size, outputs_) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Add output data buffer failed, buffer size " << buffer_size;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
aclmdlIODims dims;
|
||||
ret = aclmdlGetOutputDims(model_desc_, i, &dims);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Get output shape failed";
|
||||
if (!is_run_on_device_) {
|
||||
aclrtFree(data_mem_buffer);
|
||||
} else {
|
||||
aclrtFreeHost(data_mem_buffer);
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
aclFormat format = aclmdlGetOutputFormat(model_desc_, i);
|
||||
MS_LOG(DEBUG) << "The output format of om is " << format;
|
||||
aclDataType data_type = aclmdlGetOutputDataType(model_desc_, i);
|
||||
std::vector<int64_t> shape(dims.dims, dims.dims + dims.dimCount);
|
||||
std::string output_name = aclmdlGetOutputNameByIndex(model_desc_, i);
|
||||
if (output_name.empty()) {
|
||||
MS_LOG(WARNING) << "Get name of output " << i << " failed.";
|
||||
}
|
||||
MS_LOG(INFO) << "Name of om output " << i << " is " << output_name << "Buffer size " << buffer_size;
|
||||
output_infos_.emplace_back(
|
||||
AclTensorInfo{data_mem_buffer, data_mem_buffer, buffer_size, data_type, shape, output_name});
|
||||
}
|
||||
MS_LOG(INFO) << "Create model output success.";
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
void ModelProcess::DestroyInputsDataset() {
|
||||
if (inputs_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(inputs_); i++) {
|
||||
auto dataBuffer = aclmdlGetDatasetBuffer(inputs_, i);
|
||||
aclDestroyDataBuffer(dataBuffer);
|
||||
}
|
||||
aclmdlDestroyDataset(inputs_);
|
||||
inputs_ = nullptr;
|
||||
}
|
||||
|
||||
void ModelProcess::DestroyInputsDataMem() {
|
||||
if (!is_run_on_device_) {
|
||||
for (const auto &item : input_infos_) {
|
||||
aclrtFree(item.device_data);
|
||||
}
|
||||
}
|
||||
input_infos_.clear();
|
||||
}
|
||||
|
||||
void ModelProcess::DestroyInputsBuffer() {
|
||||
DestroyInputsDataMem();
|
||||
DestroyInputsDataset();
|
||||
}
|
||||
|
||||
void ModelProcess::DestroyOutputsBuffer() {
|
||||
for (const auto &item : output_infos_) {
|
||||
if (!is_run_on_device_) {
|
||||
aclrtFree(item.device_data);
|
||||
} else {
|
||||
aclrtFreeHost(item.device_data);
|
||||
}
|
||||
}
|
||||
output_infos_.clear();
|
||||
|
||||
if (outputs_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(outputs_); i++) {
|
||||
auto dataBuffer = aclmdlGetDatasetBuffer(outputs_, i);
|
||||
aclDestroyDataBuffer(dataBuffer);
|
||||
}
|
||||
aclmdlDestroyDataset(outputs_);
|
||||
outputs_ = nullptr;
|
||||
}
|
||||
|
||||
STATUS ModelProcess::UnLoad() {
|
||||
auto ret = aclmdlUnload(model_id_);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Unload model failed, ret = " << ret;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
if (model_desc_ != nullptr) {
|
||||
ret = aclmdlDestroyDesc(model_desc_);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Unload model failed, ret = " << ret;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
model_desc_ = nullptr;
|
||||
}
|
||||
DestroyInputsBuffer();
|
||||
DestroyOutputsBuffer();
|
||||
MS_LOG(INFO) << "End unload model " << model_id_;
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS ModelProcess::SetBatchSize(const std::vector<KernelTensorPtr> &inputs) {
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
input_infos_[i].buffer_size = inputs[i]->GetData()->size;
|
||||
}
|
||||
auto batch_size_tensor = inputs[inputs.size() - 1];
|
||||
size_t data_type_size = lite::DataTypeSize(batch_size_tensor->GetDtype());
|
||||
size_t num = 0;
|
||||
if (data_type_size != 0) {
|
||||
num = batch_size_tensor->GetData()->size / data_type_size;
|
||||
}
|
||||
if (num != kBatchSizeNum) {
|
||||
MS_LOG(ERROR) << "Batch size num should be " << kBatchSizeNum;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto *ptr = reinterpret_cast<const int32_t *>(batch_size_tensor->GetData()->addr);
|
||||
CHECK_NULL_RETURN(ptr);
|
||||
auto batch_size = ptr[0];
|
||||
aclError ret;
|
||||
size_t index;
|
||||
ret = aclmdlGetInputIndexByName(model_desc_, ACL_DYNAMIC_TENSOR_NAME, &index);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Get index failed";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
MS_LOG(INFO) << "Set Batch size(" << batch_size << ") of input " << index << ".";
|
||||
ret = aclmdlSetDynamicBatchSize(model_id_, inputs_, index, batch_size);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS ModelProcess::SetImageSize(const std::vector<KernelTensorPtr> &inputs) {
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
input_infos_[i].buffer_size = inputs[i]->GetData()->size;
|
||||
}
|
||||
auto image_size_tensor = inputs[inputs.size() - 1];
|
||||
size_t data_type_size = lite::DataTypeSize(image_size_tensor->GetDtype());
|
||||
size_t num = 0;
|
||||
if (data_type_size != 0) {
|
||||
num = image_size_tensor->GetData()->size / data_type_size;
|
||||
}
|
||||
if (num != kImageSizeHwNum) {
|
||||
MS_LOG(ERROR) << "Image size hw num should be " << kImageSizeHwNum;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto *hw = reinterpret_cast<const int32_t *>(image_size_tensor->GetData()->addr);
|
||||
CHECK_NULL_RETURN(hw);
|
||||
int32_t height = hw[0];
|
||||
int32_t width = hw[1];
|
||||
size_t index;
|
||||
aclError ret = ACL_ERROR_NONE;
|
||||
ret = aclmdlGetInputIndexByName(model_desc_, ACL_DYNAMIC_TENSOR_NAME, &index);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Get index failed";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
MS_LOG(INFO) << "Set Image size(" << height << "," << width << ") of input " << index << ".";
|
||||
ret = aclmdlSetDynamicHWSize(model_id_, inputs_, index, height, width);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS ModelProcess::CheckTensorByTensorInfo(const std::vector<KernelTensorPtr> &tensor,
|
||||
const std::vector<AclTensorInfo> &tensor_info) {
|
||||
if (!IsDynamicShape()) {
|
||||
for (size_t i = 0; i < tensor_info.size(); ++i) {
|
||||
if (tensor[i]->GetShapeVector() != tensor_info[i].dims) {
|
||||
MS_LOG(WARNING) << "Note: input " << i << " shape not match, required " << ShapeToString(tensor_info[i].dims)
|
||||
<< ", given " << ShapeToString(tensor[i]->GetShapeVector()) << "."
|
||||
<< "Please check input shape has been modified by DVPP method.";
|
||||
}
|
||||
if (tensor[i]->GetDtype() != TransToDataType(tensor_info[i].data_type)) {
|
||||
MS_LOG(ERROR) << "Note: input " << i << " data type not match, required "
|
||||
<< static_cast<int>(TransToDataType(tensor_info[i].data_type)) << ", given "
|
||||
<< static_cast<int>(tensor[i]->GetDtype());
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
if (tensor[i]->GetData()->size != tensor_info[i].buffer_size) {
|
||||
MS_LOG(ERROR) << "Input " << i << " data size not match, required size " << tensor_info[i].buffer_size
|
||||
<< ", given count " << tensor[i]->GetData()->size;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS ModelProcess::ProcDynamicShape(const std::vector<KernelTensorPtr> &inputs) {
|
||||
if (!IsDynamicShape()) {
|
||||
MS_LOG(DEBUG) << "Input is not dynamic shape";
|
||||
return lite::RET_OK;
|
||||
}
|
||||
if (IsDynamicBatchSize()) {
|
||||
if (SetBatchSize(inputs) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Set dynamic batch size failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
}
|
||||
if (IsDynamicImageSize()) {
|
||||
if (SetImageSize(inputs) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Set dynamic image size failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
}
|
||||
if (ResetOutputSize() != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Reset output size failed";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
bool ModelProcess::IsDynamicShape() { return IsDynamicBatchSize() || IsDynamicImageSize(); }
|
||||
|
||||
bool ModelProcess::IsDynamicBatchSize() { return !GetDynamicBatch().empty(); }
|
||||
|
||||
bool ModelProcess::IsDynamicImageSize() { return !GetDynamicImage().empty(); }
|
||||
|
||||
STATUS ModelProcess::CheckAndInitInput(const std::vector<KernelTensorPtr> &inputs) {
|
||||
aclError ret;
|
||||
inputs_ = aclmdlCreateDataset();
|
||||
// check inputs
|
||||
if (CheckTensorByTensorInfo(inputs, input_infos_) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Check input tensor failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
// copy inputs
|
||||
for (size_t i = 0; i < input_infos_.size(); ++i) {
|
||||
auto &info = input_infos_[i];
|
||||
auto input = inputs[i];
|
||||
void *data = input->GetData()->addr;
|
||||
void *input_buffer = nullptr;
|
||||
if (!is_run_on_device_) {
|
||||
info.cur_device_data = info.device_data;
|
||||
ret =
|
||||
aclrtMemcpy(info.cur_device_data, info.buffer_size, data, input->GetData()->size, ACL_MEMCPY_HOST_TO_DEVICE);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Acl memcpy input " << i
|
||||
<< " data to device failed, src input size: " << input->GetData()->size
|
||||
<< ", dst device buffer size: " << info.buffer_size;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
input_buffer = info.cur_device_data;
|
||||
} else {
|
||||
input_buffer = data;
|
||||
}
|
||||
auto data_buffer = aclCreateDataBuffer(input_buffer, info.buffer_size);
|
||||
if (data_buffer == nullptr) {
|
||||
MS_LOG(ERROR) << "Create Data Buffer failed";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
ret = aclmdlAddDatasetBuffer(inputs_, data_buffer);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Add data buffer failed";
|
||||
aclDestroyDataBuffer(data_buffer);
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
}
|
||||
if (ProcDynamicShape(inputs) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Proc input dynamic shape failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS ModelProcess::ResetOutputSize() {
|
||||
aclDataType output_type;
|
||||
aclError ret;
|
||||
size_t output_size = aclmdlGetNumOutputs(model_desc_);
|
||||
for (size_t index = 0; index < output_size; index++) {
|
||||
size_t dims = 1;
|
||||
struct aclmdlIODims output_dims;
|
||||
ret = aclmdlGetCurOutputDims(model_desc_, index, &output_dims);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "get output dim error.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
std::vector<int64_t> shape(output_dims.dims, output_dims.dims + output_dims.dimCount);
|
||||
for (size_t i = 0; i < output_dims.dimCount; i++) {
|
||||
dims *= output_dims.dims[i];
|
||||
}
|
||||
output_type = aclmdlGetOutputDataType(model_desc_, index);
|
||||
output_infos_[index].dims = shape;
|
||||
output_infos_[index].buffer_size = dims * aclDataTypeSize(output_type);
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS ModelProcess::PredictFromHost(const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) {
|
||||
STATUS ret = CheckAndInitInput(inputs);
|
||||
if (ret != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Check or init input failed";
|
||||
DestroyInputsDataset();
|
||||
return ret; // forward status error
|
||||
}
|
||||
|
||||
aclError acl_ret;
|
||||
auto env = std::getenv("GLOG_v");
|
||||
if (env != nullptr && env[0] == '1') {
|
||||
struct timeval start_time;
|
||||
struct timeval end_time;
|
||||
(void)gettimeofday(&start_time, nullptr);
|
||||
acl_ret = aclmdlExecute(model_id_, inputs_, outputs_);
|
||||
(void)gettimeofday(&end_time, nullptr);
|
||||
constexpr uint64_t kUSecondInSecond = 1000000;
|
||||
uint64_t cost =
|
||||
(kUSecondInSecond * static_cast<uint64_t>(end_time.tv_sec) + static_cast<uint64_t>(end_time.tv_usec)) -
|
||||
(kUSecondInSecond * static_cast<uint64_t>(start_time.tv_sec) + static_cast<uint64_t>(start_time.tv_usec));
|
||||
MS_LOG(INFO) << "Model execute in " << cost << " us";
|
||||
} else {
|
||||
acl_ret = aclmdlExecute(model_id_, inputs_, outputs_);
|
||||
}
|
||||
|
||||
DestroyInputsDataset();
|
||||
if (acl_ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Execute Model Failed, ret = " << acl_ret;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
ret = GetOutputs(outputs);
|
||||
if (ret != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Build outputs failed";
|
||||
return ret;
|
||||
}
|
||||
MS_LOG(INFO) << "Execute model success";
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS ModelProcess::GetOutputs(const std::vector<KernelTensorPtr> &outputs) {
|
||||
if (outputs.empty()) {
|
||||
MS_LOG(ERROR) << "Ms tensor outputs is empty.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
|
||||
if (ConstructTensor(outputs) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Construct ms tensor failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS ModelProcess::ConstructTensor(const std::vector<KernelTensorPtr> &outputs) {
|
||||
if (outputs.size() != output_infos_.size()) {
|
||||
MS_LOG(ERROR) << "Actual tensor count not match, required count " << output_infos_.size() << ", given count "
|
||||
<< outputs.size();
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
std::vector<std::string> names;
|
||||
std::vector<std::vector<int64_t>> shapes;
|
||||
std::vector<enum TypeId> data_types;
|
||||
std::vector<size_t> mem_sizes;
|
||||
if (ConstructTensorDesc(output_infos_, &names, &shapes, &data_types, &mem_sizes) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Construct tensor desc failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
// set output info and malloc data size
|
||||
for (size_t i = 0; i < output_infos_.size(); ++i) {
|
||||
if (outputs[i]->GetData()->size != mem_sizes[i]) {
|
||||
MS_LOG(ERROR) << "Ms tensor size " << outputs[i]->GetData()->size << " not match model tensor size "
|
||||
<< mem_sizes[i];
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
}
|
||||
aclrtMemcpyKind kind = is_run_on_device_ ? ACL_MEMCPY_HOST_TO_HOST : ACL_MEMCPY_DEVICE_TO_HOST;
|
||||
for (size_t i = 0; i < output_infos_.size(); ++i) {
|
||||
if (output_infos_[i].cur_device_data == nullptr) {
|
||||
// when run on device, cur_device_data is nullptr before first execute
|
||||
continue;
|
||||
}
|
||||
auto ret = aclrtMemcpy(outputs[i]->GetData()->addr, outputs[i]->GetData()->size, output_infos_[i].cur_device_data,
|
||||
output_infos_[i].buffer_size, kind);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Memcpy input " << i << " from " << (is_run_on_device_ ? "host" : "device")
|
||||
<< " to host failed, memory size " << output_infos_[i].buffer_size;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
} // namespace acl
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,104 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_PROCESS_H_
|
||||
#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_PROCESS_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include "acl/acl.h"
|
||||
#include "acl/acl_mdl.h"
|
||||
#include "acl/acl_rt.h"
|
||||
#include "include/api/types.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "kernel/kernel.h"
|
||||
#include "extendrt/kernel/ascend/options/acl_model_options.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
using mindspore::lite::STATUS;
|
||||
struct AclTensorInfo {
|
||||
void *cur_device_data;
|
||||
void *device_data;
|
||||
size_t buffer_size;
|
||||
aclDataType data_type;
|
||||
std::vector<int64_t> dims;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
class ModelProcess {
|
||||
public:
|
||||
explicit ModelProcess(const AclModelOptionsPtr &options)
|
||||
: options_(options),
|
||||
model_id_(0xffffffff),
|
||||
is_run_on_device_(false),
|
||||
model_desc_(nullptr),
|
||||
inputs_(nullptr),
|
||||
outputs_(nullptr),
|
||||
input_infos_(),
|
||||
output_infos_() {}
|
||||
~ModelProcess() {}
|
||||
|
||||
STATUS UnLoad();
|
||||
STATUS PredictFromHost(const std::vector<KernelTensorPtr> &inputs, const std::vector<KernelTensorPtr> &outputs);
|
||||
STATUS PreInitModelResource();
|
||||
|
||||
// override this method to avoid request/reply data copy
|
||||
void SetIsDevice(bool is_device) { is_run_on_device_ = is_device; }
|
||||
|
||||
void set_model_id(uint32_t model_id) { model_id_ = model_id; }
|
||||
uint32_t model_id() const { return model_id_; }
|
||||
std::set<uint64_t> GetDynamicBatch();
|
||||
std::set<std::pair<uint64_t, uint64_t>> GetDynamicImage();
|
||||
|
||||
private:
|
||||
STATUS CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset);
|
||||
STATUS CheckAndInitInput(const std::vector<KernelTensorPtr> &inputs);
|
||||
STATUS CheckTensorByTensorInfo(const std::vector<KernelTensorPtr> &tensor,
|
||||
const std::vector<AclTensorInfo> &tensor_info);
|
||||
STATUS GetOutputs(const std::vector<KernelTensorPtr> &outputs);
|
||||
STATUS ConstructTensor(const std::vector<KernelTensorPtr> &outputs);
|
||||
STATUS SetBatchSize(const std::vector<KernelTensorPtr> &inputs);
|
||||
STATUS SetImageSize(const std::vector<KernelTensorPtr> &inputs);
|
||||
STATUS InitInputsBuffer();
|
||||
STATUS InitOutputsBuffer();
|
||||
STATUS ResetOutputSize();
|
||||
STATUS ProcDynamicShape(const std::vector<KernelTensorPtr> &inputs);
|
||||
std::string VectorToString(const std::vector<int64_t> &);
|
||||
bool IsDynamicShape();
|
||||
bool IsDynamicBatchSize();
|
||||
bool IsDynamicImageSize();
|
||||
void DestroyInputsDataset();
|
||||
void DestroyInputsDataMem();
|
||||
void DestroyInputsBuffer();
|
||||
void DestroyOutputsBuffer();
|
||||
|
||||
AclModelOptionsPtr options_;
|
||||
uint32_t model_id_;
|
||||
// if run one device(AICPU), there is no need to alloc device memory and copy inputs to(/outputs from) device
|
||||
bool is_run_on_device_;
|
||||
aclmdlDesc *model_desc_;
|
||||
aclmdlDataset *inputs_;
|
||||
aclmdlDataset *outputs_;
|
||||
std::vector<AclTensorInfo> input_infos_;
|
||||
std::vector<AclTensorInfo> output_infos_;
|
||||
};
|
||||
} // namespace acl
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_PROCESS_H_
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_MODEL_OPTIONS_H_
|
||||
#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_MODEL_OPTIONS_H_
|
||||
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
struct AclModelOptions {
|
||||
int32_t device_id;
|
||||
std::string dump_cfg_path;
|
||||
std::set<uint64_t> batch_size;
|
||||
std::set<std::pair<uint64_t, uint64_t>> image_size;
|
||||
|
||||
AclModelOptions() : device_id(0) {}
|
||||
};
|
||||
|
||||
using AclModelOptionsPtr = std::shared_ptr<AclModelOptions>;
|
||||
} // namespace acl
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_MODEL_OPTIONS_H_
|
|
@ -0,0 +1,80 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "extendrt/kernel/ascend/options/acl_options_parser.h"
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "common/log_adapter.h"
|
||||
#include "src/common/log_util.h"
|
||||
#include "src/common/utils.h"
|
||||
#include "acl/acl_base.h"
|
||||
#include "acl/acl_rt.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
constexpr auto kImageHwNum = 2;
|
||||
|
||||
STATUS AclOptionsParser::ParseAclOptions(const mindspore::Context *ctx, AclModelOptionsPtr *const acl_options) {
|
||||
CHECK_NULL_RETURN(ctx);
|
||||
CHECK_NULL_RETURN(acl_options);
|
||||
|
||||
auto context = const_cast<mindspore::Context *>(ctx);
|
||||
CHECK_NULL_RETURN(context);
|
||||
auto device_infos = context->MutableDeviceInfo();
|
||||
if (device_infos.size() < 1) {
|
||||
MS_LOG(WARNING) << "Context is not set device info, please check.";
|
||||
return lite::RET_OK;
|
||||
}
|
||||
CHECK_NULL_RETURN(device_infos[0]);
|
||||
if (ParseOptions(device_infos[0], acl_options) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Parse model options failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS AclOptionsParser::ParseOptions(const std::shared_ptr<DeviceInfoContext> &device_info,
|
||||
AclModelOptions *acl_options) {
|
||||
auto ascend_info = device_info->Cast<mindspore::AscendDeviceInfo>();
|
||||
if (ascend_info == nullptr) {
|
||||
MS_LOG(ERROR) << "There is no ascend info.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
int32_t device_id = static_cast<int32_t>(ascend_info->GetDeviceID());
|
||||
if (CheckDeviceId(&device_id) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Check device id failed, device id = " << device_id;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
acl_options->device_id = device_id;
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
STATUS AclOptionsParser::CheckDeviceId(int32_t *device_id) {
|
||||
CHECK_NULL_RETURN(device_id);
|
||||
uint32_t device_count;
|
||||
if (aclrtGetDeviceCount(&device_count) != ACL_ERROR_NONE) {
|
||||
MS_LOG(WARNING) << "Get device count failed.";
|
||||
return lite::RET_OK;
|
||||
}
|
||||
if (*device_id >= static_cast<int32_t>(device_count)) {
|
||||
MS_LOG(ERROR) << "Current device id " << *device_id << " is larger than max count " << device_count
|
||||
<< ",please check the device info of context.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
} // namespace acl
|
||||
} // namespace mindspore::kernel
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_OPTIONS_PARSER_H_
|
||||
#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_OPTIONS_PARSER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "include/api/context.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "extendrt/kernel/ascend/options/acl_model_options.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
using mindspore::lite::STATUS;
|
||||
|
||||
class AclOptionsParser {
|
||||
public:
|
||||
STATUS ParseAclOptions(const mindspore::Context *ctx, AclModelOptionsPtr *const acl_options);
|
||||
|
||||
private:
|
||||
STATUS ParseOptions(const std::shared_ptr<DeviceInfoContext> &device_info, AclModelOptions *acl_options);
|
||||
STATUS CheckDeviceId(int32_t *device_id);
|
||||
};
|
||||
} // namespace acl
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_OPTIONS_PARSER_H_
|
|
@ -52,16 +52,19 @@ Status SingleOpInferSession::CompileGraph(FuncGraphPtr graph) {
|
|||
for (const auto &kernel_node : kernel_nodes) {
|
||||
mindspore::infer::SetKernelInfo(kernel_node);
|
||||
std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
std::shared_ptr<kernel::CpuKernelMod> cpu_kernel_mod =
|
||||
kernel::Factory<kernel::CpuKernelMod>::Instance().Create(kernel_name);
|
||||
std::shared_ptr<kernel::KernelMod> kernel_mod = kernel::Factory<kernel::KernelMod>::Instance().Create(kernel_name);
|
||||
MS_LOG(INFO) << "SingleOpInferSession::Kernels " << kernel_name;
|
||||
auto args = kernel::AbstractArgsFromCNode(kernel_node);
|
||||
auto ret = cpu_kernel_mod->Init(args.op, args.inputs, args.outputs);
|
||||
if (kernel_mod == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Kernel mod is nullptr, kernel name: " << kernel_name;
|
||||
}
|
||||
mindspore::infer::CopyInputWeights(kernel_node, args.inputs);
|
||||
auto ret = kernel_mod->Init(args.op, args.inputs, args.outputs);
|
||||
MS_LOG(INFO) << "SingleOpInferSession::Kernels ret " << ret;
|
||||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "kernel init failed " << kernel_name;
|
||||
}
|
||||
if (cpu_kernel_mod->Resize(args.op, args.inputs, args.outputs, kernel::GetKernelDepends(kernel_node)) ==
|
||||
if (kernel_mod->Resize(args.op, args.inputs, args.outputs, kernel::GetKernelDepends(kernel_node)) ==
|
||||
kernel::KRET_RESIZE_FAILED) {
|
||||
MS_LOG(EXCEPTION) << "CPU kernel op [" << kernel_node->fullname_with_scope() << "] Resize failed.";
|
||||
}
|
||||
|
@ -90,10 +93,10 @@ Status SingleOpInferSession::CompileGraph(FuncGraphPtr graph) {
|
|||
tensor_size = std::max(tensor_size, type_size);
|
||||
(void)output_size_list.emplace_back(tensor_size);
|
||||
}
|
||||
cpu_kernel_mod->SetInputSizeList(input_size_list);
|
||||
cpu_kernel_mod->SetOutputSizeList(output_size_list);
|
||||
kernel_mod->SetInputSizeList(input_size_list);
|
||||
kernel_mod->SetOutputSizeList(output_size_list);
|
||||
|
||||
AnfAlgo::SetKernelMod(cpu_kernel_mod, kernel_node.get());
|
||||
AnfAlgo::SetKernelMod(kernel_mod, kernel_node.get());
|
||||
}
|
||||
|
||||
this->AssignKernelGraphAddress(kernel_graph_);
|
||||
|
@ -284,9 +287,29 @@ device::DeviceAddressPtr SingleOpInferSession::CreateDeviceAddress(void *device_
|
|||
return std::make_shared<InferDeviceAddress>(device_ptr, device_size, format, type_id);
|
||||
}
|
||||
|
||||
std::vector<AnfNodePtr> SingleOpInferSession::GetGraphDataInputs() const {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_);
|
||||
std::vector<AnfNodePtr> data_inputs;
|
||||
auto inputs = kernel_graph_->inputs();
|
||||
for (auto input : inputs) {
|
||||
if (input->isa<Parameter>()) {
|
||||
auto parameter = input->cast<ParameterPtr>();
|
||||
if (parameter != nullptr && !parameter->has_default()) {
|
||||
data_inputs.push_back(input);
|
||||
}
|
||||
}
|
||||
}
|
||||
return data_inputs;
|
||||
}
|
||||
|
||||
void SingleOpInferSession::CopyInputs(const std::vector<tensor::TensorPtr> inputs) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_);
|
||||
auto graph_inputs = kernel_graph_->inputs();
|
||||
auto graph_inputs = GetGraphDataInputs();
|
||||
if (graph_inputs.size() != inputs.size()) {
|
||||
MS_LOG(ERROR) << "Graph inputs size[" << graph_inputs.size() << "] is not equal to User input size[ "
|
||||
<< inputs.size() << "].";
|
||||
return;
|
||||
}
|
||||
for (size_t i = 0; i < graph_inputs.size(); i++) {
|
||||
auto input = inputs[i];
|
||||
auto graph_input = graph_inputs[i];
|
||||
|
|
|
@ -49,6 +49,7 @@ class SingleOpInferSession : public InferSession {
|
|||
device::DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,
|
||||
TypeId type_id) const;
|
||||
void CopyInputs(const std::vector<tensor::TensorPtr> inputs);
|
||||
std::vector<AnfNodePtr> GetGraphDataInputs() const;
|
||||
void CopyOutputs(std::vector<tensor::TensorPtr> *outputs);
|
||||
|
||||
private:
|
||||
|
|
|
@ -35,6 +35,9 @@ using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
|
|||
using mindspore::kernel::KernelBuildInfo;
|
||||
namespace {
|
||||
constexpr auto kParamDynamic = "dynamic";
|
||||
constexpr auto kCustomAscendInputNum = 3;
|
||||
constexpr auto kNameCustomAscend = "CustomAscend";
|
||||
constexpr auto kCustomTypeAscend = "acl_build";
|
||||
|
||||
bool IsInputNotCNode(const CNodePtr &kernel_node, size_t input_index) {
|
||||
auto input_node = common::AnfAlgo::VisitKernel(kernel_node->input(input_index + 1), 0).first;
|
||||
|
@ -326,8 +329,9 @@ void UpdateCustomKernelBuildInfo(const CNodePtr &kernel_node, bool is_akg_op) {
|
|||
GetOutputFormat(kernel_node, &output_formats);
|
||||
builder->SetOutputsDeviceType(output_types);
|
||||
builder->SetOutputsFormat(output_formats);
|
||||
// AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), kernel_node.get());
|
||||
|
||||
if (op_name == kNameCustomAscend) {
|
||||
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), kernel_node.get());
|
||||
}
|
||||
// check reg info if kernel_attr is not null
|
||||
if (kernel_attr != nullptr) {
|
||||
std::vector<std::shared_ptr<KernelBuildInfo>> kernel_info_list;
|
||||
|
@ -465,6 +469,10 @@ std::pair<std::string, ExceptionType> SetKernelInfoWithMsg(const CNodePtr &kerne
|
|||
UpdateCustomKernelBuildInfo(kernel_node, true);
|
||||
return {};
|
||||
}
|
||||
if (tp == kCustomTypeAscend) {
|
||||
UpdateCustomKernelBuildInfo(kernel_node, false);
|
||||
return {};
|
||||
}
|
||||
// If Custom op has not set reg info, then infer info from inputs
|
||||
if (mindspore::kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kCPU) == nullptr) {
|
||||
MS_LOG(WARNING) << "Not find operator information for Custom operator[" << op_name << "]. "
|
||||
|
@ -535,5 +543,49 @@ void SetKernelInfo(const CNodePtr &kernel_node) {
|
|||
if (msg.empty()) return;
|
||||
MS_EXCEPTION(etype) << msg;
|
||||
}
|
||||
|
||||
void CopyInputWeights(const CNodePtr &kernel_node, const std::vector<kernel::KernelTensorPtr> &inputs) {
|
||||
std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (kernel_name == kNameCustomAscend) {
|
||||
auto node_input_size = kernel_node->inputs().size();
|
||||
if (node_input_size < kCustomAscendInputNum) {
|
||||
MS_LOG(ERROR) << "Input num of custom ascend kernel should larger than " << (kCustomAscendInputNum - 1)
|
||||
<< ", real num is " << node_input_size;
|
||||
return;
|
||||
}
|
||||
if (node_input_size != inputs.size() + 1) {
|
||||
MS_LOG(ERROR) << "Input num of custom ascend kernel [" << node_input_size << "]"
|
||||
<< " is not equal to kernel tensor size[" << (inputs.size() + 1) << "].";
|
||||
return;
|
||||
}
|
||||
auto om_input = kernel_node->input(node_input_size - 1);
|
||||
if (!om_input->isa<Parameter>()) {
|
||||
MS_LOG(ERROR) << "Om input is not parameter.";
|
||||
return;
|
||||
}
|
||||
ParameterPtr om_param = om_input->cast<ParameterPtr>();
|
||||
if (om_param == nullptr || !om_param->has_default()) {
|
||||
MS_LOG(ERROR) << "Om param is invalid, val= " << om_param;
|
||||
return;
|
||||
}
|
||||
auto tensor = std::static_pointer_cast<tensor::Tensor>(om_param->default_param());
|
||||
if (tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "Tensor is nullptr.";
|
||||
return;
|
||||
}
|
||||
if (tensor->data_c() == nullptr || tensor->Size() == 0) {
|
||||
MS_LOG(ERROR) << "Tensor data is invalid.";
|
||||
return;
|
||||
}
|
||||
auto new_addr = malloc(tensor->Size());
|
||||
if (new_addr == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc failed, size= " << tensor->Size();
|
||||
return;
|
||||
}
|
||||
memcpy(new_addr, tensor->data_c(), tensor->Size());
|
||||
kernel::AddressPtr addr_ptr = std::make_shared<kernel::Address>(new_addr, tensor->Size());
|
||||
inputs[inputs.size() - 1]->SetData(addr_ptr);
|
||||
}
|
||||
}
|
||||
} // namespace infer
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -26,11 +26,13 @@
|
|||
#include "ir/anf.h"
|
||||
#include "ir/dtype/type.h"
|
||||
#include "include/common/utils/utils.h"
|
||||
#include "mindspore/ccsrc/kernel/kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace infer {
|
||||
using DataType = std::pair<TypeId, std::string>;
|
||||
void SetKernelInfo(const CNodePtr &apply_kernel_ptr);
|
||||
void CopyInputWeights(const CNodePtr &kernel_node, const std::vector<kernel::KernelTensorPtr> &inputs);
|
||||
} // namespace infer
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -15,7 +15,11 @@
|
|||
*/
|
||||
|
||||
#include "tools/converter/adapter/acl/src/acl_model_process.h"
|
||||
#ifdef ENABLE_CLOUD_FUSION_INFERENCE
|
||||
#include "src/extendrt/kernel/ascend/model/acl_env_guard.h"
|
||||
#else
|
||||
#include "src/runtime/kernel/ascend/src/acl_env_guard.h"
|
||||
#endif
|
||||
#include "src/common/log_util.h"
|
||||
#include "acl/acl.h"
|
||||
#include "acl/acl_rt.h"
|
||||
|
|
|
@ -52,6 +52,8 @@ constexpr auto kInferShapePass = "InferShapePass";
|
|||
constexpr auto kConstFoldPass = "ConstFoldPass";
|
||||
constexpr auto kRemoveRedundantOpPass = "RemoveRedundantOpPass";
|
||||
constexpr auto kDelRedundantTranspose = "DeleteRedundantTranspose";
|
||||
constexpr auto kFuncType = "func_type";
|
||||
constexpr auto kUniqueName = "uniq_name";
|
||||
constexpr size_t kDependInputNum = 3;
|
||||
constexpr size_t kDependFirstInputIdx = 1;
|
||||
constexpr size_t kTupleGetItemFirstInputIdx = 1;
|
||||
|
@ -155,6 +157,10 @@ STATUS AclPassImpl::PreProcGraph(const FuncGraphPtr &func_graph) {
|
|||
}
|
||||
|
||||
STATUS AclPassImpl::PostProcGraph(const FuncGraphPtr &func_graph) {
|
||||
if (lite::acl::DelRedundantParameter(func_graph) != RET_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Delete redundant parameters failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
if (!user_options_cfg_.offline) {
|
||||
MS_LOG(DEBUG) << "Online model infer no need to change to nhwc format.";
|
||||
return lite::RET_OK;
|
||||
|
@ -548,6 +554,8 @@ void AclPassImpl::SetCustomAttrs(const std::shared_ptr<ops::Custom> &prim) {
|
|||
std::vector<uint8_t> output_dim_char(output_dim_str.begin(), output_dim_str.end());
|
||||
std::map<std::string, std::vector<uint8_t>> attrs = {{lite::acl::kOutputShapes, output_dim_char}};
|
||||
prim->set_attr(attrs);
|
||||
prim->AddAttr(kFuncType, api::MakeValue<std::string>("acl_build"));
|
||||
prim->AddAttr(kUniqueName, api::MakeValue<std::string>("CustomAscend"));
|
||||
}
|
||||
|
||||
CNodePtr AclPassImpl::CreateCustomNode(const FuncGraphPtr &func_graph) {
|
||||
|
|
|
@ -136,6 +136,9 @@ STATUS DeleteRedundantTranspose::TransTransFusion(const FuncGraphPtr &func_graph
|
|||
if (!manager_->Replace(cnode, pre_cnode->input(1))) {
|
||||
MS_LOG(ERROR) << "replace old node failed, please check.";
|
||||
return lite::RET_ERROR;
|
||||
} else {
|
||||
func_graph->DropNode(cnode->input(kInputIndexTwo));
|
||||
func_graph->DropNode(pre_cnode->input(kInputIndexTwo));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue