!37274 support ascend cloud infer

Merge pull request !37274 from zhengyuanhua/br3
This commit is contained in:
i-robot 2022-07-05 11:25:24 +00:00 committed by Gitee
commit 69aa258eb2
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
27 changed files with 1939 additions and 14 deletions

View File

@ -84,7 +84,7 @@ std::string OpAdapterImpl::GetCustomOpType(const PrimitivePtr &prim) const {
MS_EXCEPTION_IF_NULL(prim);
auto value = prim->GetAttr("reg_op_name");
if (value == nullptr) {
MS_LOG(ERROR) << "Custom op has no func_type attr.";
MS_LOG(ERROR) << "Custom op has no reg_op_name attr.";
return "";
}
auto op_type = GetValue<std::string>(value);

View File

@ -1441,6 +1441,10 @@ bool MSANFModelParser::BuildAttrForFuncGraph(const FuncGraphPtr &outputFuncGraph
outputFuncGraph->set_attr(attr_proto.name(), ParseAttrInSingleScalar_int32_t_bool(attr_proto));
break;
}
case mind_ir::AttributeProto_AttributeType_INT32: {
outputFuncGraph->set_attr(attr_proto.name(), ParseAttrInSingleScalar_int32_t_int32_t(attr_proto));
break;
}
default:
MS_LOG(ERROR) << "Obtain attr for graph has not support input type: " << attr_type << "!";
return false;

View File

@ -202,6 +202,13 @@ if(DEFINED ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE})
set(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE $ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE})
endif()
if(MSLITE_ENABLE_ACL AND MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
set(PLATFORM_ARM64 off)
set(PLATFORM_ARM32 off)
set(MSLITE_ENABLE_FP16 off)
set(ENABLE_NEON off)
endif()
if(MACHINE_LINUX_ARM64)
add_compile_definitions(MACHINE_LINUX_ARM64)
add_compile_definitions(LINUX_RUNTIME)

View File

@ -31,6 +31,12 @@ if(MSLITE_DEPS_OPENCV)
endif()
if(MSLITE_DEPS_MKLDNN)
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
set(USE_MS_THREADPOOL_FOR_DNNL ON)
endif()
if(USE_MS_THREADPOOL_FOR_DNNL)
add_compile_definitions(USE_MS_THREADPOOL_FOR_DNNL)
endif()
include(${TOP_DIR}/cmake/external_libs/mkl_dnn.cmake)
endif()
@ -47,6 +53,7 @@ if(MSLITE_DEPS_PYBIND11)
include_directories(${Python3_NumPy_INCLUDE_DIRS})
include_directories(${TOP_DIR})
include_directories(${CORE_DIR})
set(PYBIND11_CPP_STANDARD -std=c++17)
include(${TOP_DIR}/cmake/external_libs/pybind11.cmake)
endif()
endif()

View File

@ -421,9 +421,13 @@ add_subdirectory(runtime/kernel/cpu)
add_library(lite_src_mid OBJECT ${LITE_SRC})
add_dependencies(lite_src_mid fbs_src)
if(MSLITE_ENABLE_ACL AND NOT MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
if(MSLITE_ENABLE_ACL)
include_directories(${TOP_DIR}/graphengine/inc/external)
if(NOT MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
add_subdirectory(runtime/kernel/ascend)
else()
add_compile_definitions(ENABLE_CLOUD_FUSION_INFERENCE)
endif()
link_directories(${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
endif()

View File

@ -13,6 +13,8 @@ if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
add_compile_definitions(USE_GLOG)
string(REPLACE "-fno-rtti" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
string(REPLACE "-fno-rtti" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
string(REPLACE "-fno-exceptions" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
string(REPLACE "-fno-exceptions" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
add_compile_definitions(ENABLE_CLOUD_FUSION_INFERENCE)
remove_definitions(-DBUILD_LITE_INFERENCE)
set(MINDIR_MODEL_SRC
@ -165,7 +167,9 @@ if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
if(MSLITE_ENABLE_ACL)
include_directories(${TOP_DIR}/graphengine/inc/external)
add_subdirectory(kernel/ascend)
link_directories(${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
target_link_libraries(mindspore-extendrt ascend_kernel_mid)
endif()
if(SUPPORT_CUDA)

View File

@ -0,0 +1,26 @@
include_directories(${TOP_DIR}/graphengine/inc/external)
include_directories(${TOP_DIR}/mindspore)
include_directories(${TOP_DIR}/mindspore/lite/src)
find_library(ge_graph libgraph.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
file(GLOB_RECURSE ASCEND_SRC ${CMAKE_CURRENT_SOURCE_DIR}
"custom_ascend_kernel.cc"
"model/*.cc"
)
add_library(ascend_kernel_mid OBJECT ${ASCEND_SRC})
add_dependencies(ascend_kernel_mid fbs_inner_src)
if("${MSLITE_REGISTRY_DEVICE}" STREQUAL "SD3403" AND PLATFORM_ARM64)
find_library(ge_graph libgraph.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(acl libascendcl.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(acl_retr libacl_retr.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(acl_cblas libacl_cblas.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
find_library(acl_runtime libruntime.so ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
target_link_libraries(ascend_kernel_mid ${ge_graph} ${acl} ${acl_retr} ${acl_cblas} ${acl_runtime})
else()
target_link_libraries(ascend_kernel_mid ${ge_graph} ${ge_compiler}
${acl_retr} ${acl_cblas} ${acl_dvpp} ${acl_runtime} ${libplatform}
${libcompress} ${libopskernel} ${libaicore_utils} ${libaicpu_engine_common} ${acl})
endif()

View File

@ -0,0 +1,242 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "extendrt/kernel/ascend/custom_ascend_kernel.h"
#include <utility>
#include "include/registry/register_kernel.h"
#include "include/api/types.h"
#include "include/api/data_type.h"
#include "extendrt/kernel/ascend/model/model_infer.h"
#include "extendrt/kernel/ascend/options/acl_options_parser.h"
#include "core/ops/custom.h"
#include "plugin/factory/ms_factory.h"
#include "src/common/log_util.h"
#include "common/log_adapter.h"
namespace mindspore::kernel {
namespace acl {
CustomAscendKernelMod::CustomAscendKernelMod()
: load_model_(false), acl_options_(nullptr), dyn_shape_proc_(nullptr), model_infer_(nullptr), input_data_idx_(0) {}
CustomAscendKernelMod::~CustomAscendKernelMod() {
if (load_model_) {
int ret = model_infer_->Finalize();
if (ret != lite::RET_OK) {
MS_LOG(ERROR) << "Model finalize failed.";
}
}
}
void CustomAscendKernelMod::RecordInputDataIndex(const std::vector<KernelTensorPtr> &inputs) {
for (size_t idx = 0; idx < inputs.size(); ++idx) {
if (inputs[idx] == nullptr) {
MS_LOG(ERROR) << "Input " << idx << " is invalid.";
return;
}
if (inputs[idx]->GetData() == nullptr) {
input_data_idx_ = idx;
break;
}
}
}
bool CustomAscendKernelMod::InitParam(const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs) {
if (inputs.empty() || outputs.empty()) {
MS_LOG(ERROR) << "Custom kernel has empty inputs or outputs, which is invalid.";
return false;
}
inputs_.assign(inputs.begin(), inputs.end() - 1);
outputs_.assign(outputs.begin(), outputs.end());
acl_options_ = std::make_shared<AclModelOptions>();
if (acl_options_ == nullptr) {
MS_LOG(ERROR) << "Create AclModelOptions failed.";
return false;
}
// AclOptionsParser parser;
// if (parser.ParseAclOptions(context_, &acl_options_) != lite::RET_OK) {
// MS_LOG(ERROR) << "Parse model options failed.";
// return false;
// }
// last input is om data tensor
int idx = inputs.size() - 1;
if (inputs[idx] == nullptr || inputs[idx]->GetData() == nullptr) {
MS_LOG(ERROR) << "Input " << idx << " is invalid.";
return false;
}
Buffer om_data(inputs[idx]->GetData()->addr, inputs[idx]->GetData()->size);
model_infer_ = std::make_shared<ModelInfer>(om_data, acl_options_);
if (model_infer_ == nullptr) {
MS_LOG(ERROR) << "Create ModelInfer failed.";
return false;
}
RecordInputDataIndex(inputs);
dyn_shape_proc_ = std::make_shared<DynShapeProcess>(acl_options_, input_data_idx_);
if (dyn_shape_proc_ == nullptr) {
MS_LOG(ERROR) << "Create DynShapeProcess failed.";
return false;
}
return true;
}
bool CustomAscendKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs) {
if (load_model_) {
MS_LOG(INFO) << "Om has been loaded in custom kernel.";
return lite::RET_OK;
}
auto kernel_ptr = std::dynamic_pointer_cast<ops::Custom>(base_operator);
if (!kernel_ptr) {
MS_LOG(ERROR) << "Cast Custom ops failed!";
return false;
}
if (!InitParam(inputs, outputs)) {
MS_LOG(ERROR) << "Init param failed.";
return false;
}
if (LoadModel() != lite::RET_OK) {
MS_LOG(ERROR) << "Load model failed.";
return false;
}
load_model_ = true;
return true;
}
int CustomAscendKernelMod::LoadModel() {
int ret = model_infer_->Init();
if (ret != lite::RET_OK) {
MS_LOG(ERROR) << "Model infer init failed.";
return lite::RET_ERROR;
}
ret = model_infer_->Load();
if (ret != lite::RET_OK) {
MS_LOG(ERROR) << "Load om data failed.";
return lite::RET_ERROR;
}
acl_options_->batch_size = model_infer_->GetDynamicBatch();
acl_options_->image_size = model_infer_->GetDynamicImage();
MS_LOG(INFO) << "Load om data success.";
return lite::RET_OK;
}
int CustomAscendKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs,
const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) {
if (!load_model_) {
MS_LOG(WARNING) << "Model has not been loaded, start to load when resize.";
if (!Init(base_operator, inputs, outputs)) {
MS_LOG(ERROR) << "Load model failed when resize.";
return lite::RET_ERROR;
}
}
return lite::RET_OK;
}
int CustomAscendKernelMod::SetInputAndOutputAddr(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
if ((inputs_.size() + 1) != inputs.size()) {
MS_LOG(ERROR) << "Size of inputs in init [" << (inputs_.size() + 1) << "] and "
<< "size of inputs in launch [" << inputs.size() << "] are not equal.";
return lite::RET_ERROR;
}
if (outputs_.size() != outputs.size()) {
MS_LOG(ERROR) << "Size of outputs in init (" << outputs_.size() << ") and "
<< "size of outputs in launch (" << outputs.size() << ") are not equal.";
return lite::RET_ERROR;
}
for (size_t i = 0; i < inputs_.size(); ++i) {
if (inputs[i]->addr == nullptr || inputs[i]->size == 0) {
MS_LOG(ERROR) << "Input " << i << " addr is invalid.";
return lite::RET_ERROR;
}
inputs_[i]->SetData(inputs[i]);
}
for (size_t j = 0; j < outputs_.size(); ++j) {
if (outputs[j]->addr == nullptr || inputs[j]->size == 0) {
MS_LOG(ERROR) << "Output " << j << " addr is invalid.";
return lite::RET_ERROR;
}
outputs_[j]->SetData(outputs[j]);
}
return lite::RET_OK;
}
bool CustomAscendKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (!load_model_) {
MS_LOG(ERROR) << "Init custom ascend kernel has been not ready.";
return false;
}
if (SetInputAndOutputAddr(inputs, outputs) != lite::RET_OK) {
MS_LOG(ERROR) << "Check input and output param failed.";
return false;
}
if (dyn_shape_proc_->ProcDynamicInput(&inputs_) != lite::RET_OK) {
MS_LOG(ERROR) << "Proc dynamic batch size input failed.";
return false;
}
if (model_infer_->Inference(inputs_, outputs_) != lite::RET_OK) {
MS_LOG(ERROR) << "Custom kernel execute failed.";
return false;
}
return true;
}
// std::shared_ptr<kernel::Kernel> CustomCreateKernel(const std::vector<mindspore::MSTensor> &inputs,
// const std::vector<mindspore::MSTensor> &outputs,
// const schema::Primitive *primitive, const mindspore::Context *ctx)
// {
// if (primitive == nullptr) {
// MS_LOG(ERROR) << "Primitive is nullptr.";
// return nullptr;
// }
// if (primitive->value_type() != schema::PrimitiveType_Custom) {
// MS_LOG(ERROR) << "Primitive type is not PrimitiveType_Custom";
// return nullptr;
// }
//
// auto kernel = std::make_shared<CustomAscendKernel>(inputs, outputs, primitive, ctx);
// if (kernel == nullptr) {
// MS_LOG(ERROR) << "New custom kernel is nullptr";
// return nullptr;
// }
// return kernel;
// }
MS_KERNEL_FACTORY_REG(KernelMod, CustomAscend, CustomAscendKernelMod);
} // namespace acl
} // namespace mindspore::kernel
namespace mindspore {
namespace registry {
namespace {
const auto kFloat32 = DataType::kNumberTypeFloat32;
const auto kFloat16 = DataType::kNumberTypeFloat16;
const auto kInt32 = DataType::kNumberTypeInt32;
const auto kInt8 = DataType::kNumberTypeInt8;
const auto kUInt8 = DataType::kNumberTypeUInt8;
const auto kBool = DataType::kNumberTypeBool;
} // namespace
// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kFloat32, ACL, kernel::acl::CustomCreateKernel)
// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kFloat16, ACL, kernel::acl::CustomCreateKernel)
// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kInt32, ACL, kernel::acl::CustomCreateKernel)
// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kInt8, ACL, kernel::acl::CustomCreateKernel)
// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kUInt8, ACL, kernel::acl::CustomCreateKernel)
// REGISTER_CUSTOM_KERNEL(ASCEND, ACL, kBool, ACL, kernel::acl::CustomCreateKernel)
} // namespace registry
} // namespace mindspore

View File

@ -0,0 +1,68 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_CUSTOM_ASCEND_KERNEL_H_
#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_CUSTOM_ASCEND_KERNEL_H_
#include <vector>
#include <string>
#include <memory>
#include <map>
#include "extendrt/kernel/ascend/options/acl_model_options.h"
#include "extendrt/kernel/ascend/model/model_infer.h"
#include "extendrt/kernel/ascend/model/dyn_shape_process.h"
#include "include/api/types.h"
#include "include/api/context.h"
#include "kernel/kernel.h"
#include "kernel/common_utils.h"
#include "include/errorcode.h"
namespace mindspore::kernel {
namespace acl {
class CustomAscendKernelMod : public kernel::KernelMod {
public:
CustomAscendKernelMod();
~CustomAscendKernelMod() override;
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs) override;
int Resize(
const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs,
const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost = std::map<uint32_t, tensor::TensorPtr>()) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
private:
void RecordInputDataIndex(const std::vector<KernelTensorPtr> &inputs);
bool InitParam(const std::vector<KernelTensorPtr> &inputs, const std::vector<KernelTensorPtr> &outputs);
int SetInputAndOutputAddr(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
int LoadModel();
bool load_model_;
std::vector<KernelTensorPtr> inputs_;
std::vector<KernelTensorPtr> outputs_;
AclModelOptionsPtr acl_options_;
DynShapeProcPtr dyn_shape_proc_;
ModelInferPtr model_infer_;
size_t input_data_idx_;
};
} // namespace acl
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_CUSTOM_ASCEND_KERNEL_H_

View File

@ -0,0 +1,60 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "extendrt/kernel/ascend/model/acl_env_guard.h"
#include "common/log_adapter.h"
#include "acl/acl.h"
namespace mindspore::kernel {
namespace acl {
std::shared_ptr<AclEnvGuard> AclEnvGuard::global_acl_env_ = nullptr;
std::mutex AclEnvGuard::global_acl_env_mutex_;
AclEnvGuard::AclEnvGuard(std::string_view cfg_file) {
errno_ = aclInit(cfg_file.data());
if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) {
MS_LOG(ERROR) << "Execute aclInit Failed";
return;
}
MS_LOG(INFO) << "Acl init success";
}
AclEnvGuard::~AclEnvGuard() { (void)aclFinalize(); }
std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv(std::string_view cfg_file) {
std::shared_ptr<AclEnvGuard> acl_env;
std::lock_guard<std::mutex> lock(global_acl_env_mutex_);
acl_env = global_acl_env_;
if (acl_env != nullptr) {
MS_LOG(INFO) << "Acl has been initialized, skip.";
if (!cfg_file.empty()) {
MS_LOG(WARNING) << "Dump config file option " << cfg_file << " is ignored.";
}
} else {
acl_env = std::make_shared<AclEnvGuard>(cfg_file);
aclError ret = acl_env->GetErrno();
if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) {
MS_LOG(ERROR) << "Execute aclInit Failed";
return nullptr;
}
global_acl_env_ = acl_env;
MS_LOG(INFO) << "Acl init success";
}
return acl_env;
}
} // namespace acl
} // namespace mindspore::kernel

View File

@ -0,0 +1,42 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_ACL_ENV_GUARD_H_
#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_ACL_ENV_GUARD_H_
#include <memory>
#include <mutex>
#include "acl/acl_base.h"
namespace mindspore::kernel {
namespace acl {
class AclEnvGuard {
public:
explicit AclEnvGuard(std::string_view cfg_file);
~AclEnvGuard();
aclError GetErrno() const { return errno_; }
static std::shared_ptr<AclEnvGuard> GetAclEnv(std::string_view cfg_file);
private:
static std::shared_ptr<AclEnvGuard> global_acl_env_;
static std::mutex global_acl_env_mutex_;
aclError errno_;
};
} // namespace acl
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_ACL_ENV_GUARD_H_

View File

@ -0,0 +1,179 @@
/**
* Copyright 2021-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "extendrt/kernel/ascend/model/dyn_shape_process.h"
#include <utility>
#include "mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/op_base.h"
#include "include/errorcode.h"
namespace mindspore::kernel {
namespace acl {
namespace {
constexpr auto kInputDimNum = 4;
constexpr auto kNHWCHeightIdx = 1;
constexpr auto kNHWCWidthIdx = 2;
constexpr auto kNCHWHeightIdx = 2;
constexpr auto kNCHWWidthIdx = 3;
constexpr auto kImageSizeHwNum = 2;
} // namespace
int DynShapeProcess::ProcDynamicInput(std::vector<KernelTensorPtr> *const inputs) {
MS_CHECK_TRUE_MSG(acl_options_ != nullptr, lite::RET_ERROR, "Acl options ptr is nullptr.");
if (acl_options_->batch_size.empty() && acl_options_->image_size.empty()) {
MS_LOG(INFO) << "Inputs are not dynamic mode.";
return lite::RET_OK;
}
if (!acl_options_->batch_size.empty() && !acl_options_->image_size.empty()) {
MS_LOG(ERROR) << "Batch size and image size can't be set at the same time.";
return lite::RET_ERROR;
}
MS_CHECK_TRUE_MSG(inputs != nullptr, lite::RET_ERROR, "Inputs is nullptr.");
if (!acl_options_->batch_size.empty()) {
if (AddBatchSizeInput(inputs) != lite::RET_OK) {
MS_LOG(ERROR) << "Add batch size input failed.";
return lite::RET_ERROR;
}
}
if (!acl_options_->image_size.empty()) {
if (AddImageSizeInput(inputs) != lite::RET_OK) {
MS_LOG(ERROR) << "Add Image size input failed.";
return lite::RET_ERROR;
}
}
return lite::RET_OK;
}
int DynShapeProcess::AddBatchSizeInput(std::vector<KernelTensorPtr> *const inputs) {
int32_t *batch_size_addr = reinterpret_cast<int32_t *>(malloc(sizeof(int32_t)));
if (batch_size_addr == nullptr) {
MS_LOG(ERROR) << "Malloc batch size failed.";
return lite::RET_ERROR;
}
if (GetRealBatchSize(inputs, batch_size_addr) != lite::RET_OK) {
MS_LOG(ERROR) << "Get real batch size failed.";
free(batch_size_addr);
return lite::RET_ERROR;
}
auto batch_size_ptr = std::make_shared<Address>(batch_size_addr, sizeof(int32_t));
if (batch_size_ptr == nullptr) {
MS_LOG(ERROR) << "Create Address failed.";
free(batch_size_addr);
return lite::RET_ERROR;
}
auto tensor_ptr = std::make_shared<KernelTensor>();
if (tensor_ptr == nullptr) {
MS_LOG(ERROR) << "Create KernelTensor failed.";
free(batch_size_addr);
return lite::RET_ERROR;
}
tensor_ptr->SetData(batch_size_ptr);
inputs->emplace_back(tensor_ptr);
return lite::RET_OK;
}
int DynShapeProcess::AddImageSizeInput(std::vector<KernelTensorPtr> *const inputs) {
int32_t *image_size_addr = reinterpret_cast<int32_t *>(malloc(kImageSizeHwNum * sizeof(int32_t)));
if (image_size_addr == nullptr) {
MS_LOG(ERROR) << "Malloc image size failed.";
return lite::RET_ERROR;
}
if (GetRealImageSize(inputs, image_size_addr, kImageSizeHwNum) != lite::RET_OK) {
MS_LOG(ERROR) << "Get real image size failed.";
free(image_size_addr);
return lite::RET_ERROR;
}
auto image_size_ptr = std::make_shared<Address>(image_size_addr, kImageSizeHwNum * sizeof(int32_t));
if (image_size_ptr == nullptr) {
MS_LOG(ERROR) << "Create Address failed.";
free(image_size_addr);
return lite::RET_ERROR;
}
auto tensor_ptr = std::make_shared<KernelTensor>();
if (tensor_ptr == nullptr) {
MS_LOG(ERROR) << "Create KernelTensor failed.";
free(image_size_addr);
return lite::RET_ERROR;
}
tensor_ptr->SetData(image_size_ptr);
inputs->emplace_back(tensor_ptr);
return lite::RET_OK;
}
int DynShapeProcess::GetRealBatchSize(std::vector<KernelTensorPtr> *const inputs, int32_t *batch_size) {
MS_CHECK_TRUE_MSG(batch_size != nullptr, lite::RET_ERROR, "Batch size ptr is nullptr.");
if (input_data_idx_ >= inputs->size()) {
MS_LOG(ERROR) << " Input data index " << input_data_idx_ << " is larger than input size " << inputs->size();
return lite::RET_ERROR;
}
auto tensor = (*inputs)[input_data_idx_];
std::vector<int64_t> shape = tensor->GetShapeVector();
if (shape.empty()) {
MS_LOG(ERROR) << "Shape is empty, input index = " << input_data_idx_;
return lite::RET_ERROR;
}
int32_t cur_batch_size = static_cast<uint64_t>(shape[0]);
auto iter = acl_options_->batch_size.find(cur_batch_size);
if (iter == acl_options_->batch_size.end()) {
MS_LOG(ERROR) << "Current batch size " << cur_batch_size << " is invalid, please check device info of context";
return lite::RET_ERROR;
}
*batch_size = cur_batch_size;
MS_LOG(DEBUG) << "Current batch size " << cur_batch_size;
return lite::RET_OK;
}
int DynShapeProcess::GetRealImageSize(std::vector<KernelTensorPtr> *const inputs, int32_t *image_size, int32_t num) {
MS_CHECK_TRUE_MSG(image_size != nullptr, lite::RET_ERROR, "Image size ptr is nullptr.");
if (input_data_idx_ >= inputs->size()) {
MS_LOG(ERROR) << "Input data index " << input_data_idx_ << " is larger than input size " << inputs->size();
return lite::RET_ERROR;
}
auto tensor = (*inputs)[input_data_idx_];
std::vector<int64_t> shape = tensor->GetShapeVector();
if (shape.size() != kInputDimNum) {
MS_LOG(ERROR) << "Shape size " << shape.size() << " is invalid, input index = " << input_data_idx_;
return lite::RET_ERROR;
}
auto format = tensor->GetFormat();
uint64_t height;
uint64_t width;
if (format == mindspore::Format::NHWC) {
height = shape[kNHWCHeightIdx];
width = shape[kNHWCWidthIdx];
} else {
height = shape[kNCHWHeightIdx];
width = shape[kNCHWWidthIdx];
}
auto cur_image_size = std::pair<int32_t, int32_t>(static_cast<uint64_t>(height), static_cast<uint64_t>(width));
auto iter = acl_options_->image_size.find(cur_image_size);
if (iter == acl_options_->image_size.end()) {
MS_LOG(ERROR) << "Image size height " << height << ",weight " << width
<< " is invalid, please check device info of context.";
return lite::RET_ERROR;
}
if (num != kImageSizeHwNum) {
MS_LOG(ERROR) << "The hw num should be " << kImageSizeHwNum << ",real num " << num;
return lite::RET_ERROR;
}
image_size[0] = height;
image_size[1] = width;
MS_LOG(DEBUG) << "Current height " << height << " width " << width;
return lite::RET_OK;
}
} // namespace acl
} // namespace mindspore::kernel

View File

@ -0,0 +1,48 @@
/**
* Copyright 2021-2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_DYN_SHAPE_PROCESS_H
#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_DYN_SHAPE_PROCESS_H
#include <vector>
#include <memory>
#include "extendrt/kernel/ascend/options/acl_model_options.h"
#include "kernel/kernel.h"
#include "include/api/types.h"
namespace mindspore::kernel {
namespace acl {
class DynShapeProcess {
public:
explicit DynShapeProcess(const AclModelOptionsPtr &options, size_t input_data_idx)
: acl_options_(options), input_data_idx_(input_data_idx) {}
int ProcDynamicInput(std::vector<KernelTensorPtr> *const inputs);
private:
int AddBatchSizeInput(std::vector<KernelTensorPtr> *const inputs);
int AddImageSizeInput(std::vector<KernelTensorPtr> *const inputs);
int GetRealBatchSize(std::vector<KernelTensorPtr> *const inputs, int32_t *batch_size);
int GetRealImageSize(std::vector<KernelTensorPtr> *const inputs, int32_t *image_size, int32_t num);
AclModelOptionsPtr acl_options_;
size_t input_data_idx_;
};
using DynShapeProcPtr = std::shared_ptr<DynShapeProcess>;
} // namespace acl
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_DYN_SHAPE_PROCESS_H

View File

@ -0,0 +1,170 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "extendrt/kernel/ascend/model/model_infer.h"
#include "common/log_adapter.h"
#include "acl/acl.h"
namespace mindspore::kernel {
namespace acl {
ModelInfer::ModelInfer(const Buffer &om_data, const AclModelOptionsPtr &options)
: init_flag_(false),
load_flag_(false),
device_type_("AscendCL"),
context_(nullptr),
om_data_(om_data),
options_(options),
model_process_(options),
acl_env_(nullptr) {}
STATUS ModelInfer::Init() {
if (init_flag_) {
MS_LOG(INFO) << "Acl has been initialized, skip.";
return lite::RET_OK;
}
if (options_ == nullptr) {
MS_LOG(ERROR) << "Acl options is nullptr.";
return lite::RET_ERROR;
}
acl_env_ = AclEnvGuard::GetAclEnv(options_->dump_cfg_path);
if (acl_env_ == nullptr) {
MS_LOG(ERROR) << "Acl init failed.";
return lite::RET_ERROR;
}
int32_t device_id = options_->device_id;
aclError ret = aclrtSetDevice(device_id);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Acl open device " << device_id << " failed.";
return lite::RET_ERROR;
}
MS_LOG(INFO) << "Open device " << device_id << " success.";
ret = aclrtCreateContext(&context_, device_id);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Acl create context failed.";
return lite::RET_ERROR;
}
MS_LOG(INFO) << "Create context success.";
aclrtRunMode run_mode;
ret = aclrtGetRunMode(&run_mode);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Acl get run mode failed.";
return lite::RET_ERROR;
}
bool is_device = (run_mode == ACL_DEVICE);
model_process_.SetIsDevice(is_device);
MS_LOG(INFO) << "Get run mode success is device input/output " << is_device;
MS_LOG(INFO) << "Init model success, device id " << device_id;
init_flag_ = true;
return lite::RET_OK;
}
STATUS ModelInfer::Finalize() {
if (!init_flag_) {
MS_LOG(WARNING) << "Init is not ok, no need to finalize.";
return lite::RET_OK;
}
aclError rt_ret = aclrtSetCurrentContext(context_);
if (rt_ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Set the ascend device context failed.";
return lite::RET_ERROR;
}
if (load_flag_) {
auto ret = model_process_.UnLoad();
if (ret != lite::RET_OK) {
MS_LOG(ERROR) << "Unload model inner failed.";
return ret;
}
}
if (context_ != nullptr) {
rt_ret = aclrtDestroyContext(context_);
if (rt_ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Destroy context failed.";
}
context_ = nullptr;
}
MS_LOG(INFO) << "End to destroy context.";
rt_ret = aclrtResetDevice(options_->device_id);
if (rt_ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Reset device " << options_->device_id << " failed.";
}
MS_LOG(INFO) << "End to reset device " << options_->device_id;
init_flag_ = false;
load_flag_ = false;
return lite::RET_OK;
}
STATUS ModelInfer::Load() {
if (!load_flag_) {
int ret = LoadAclModel(om_data_);
if (ret != lite::RET_OK) {
MS_LOG(ERROR) << "Load model model failed.";
return ret;
}
load_flag_ = true;
}
aclError rt_ret = aclrtSetCurrentContext(context_);
if (rt_ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Set the ascend device context failed, ret = " << rt_ret;
return lite::RET_ERROR;
}
return lite::RET_OK;
}
STATUS ModelInfer::LoadAclModel(const Buffer &om_data) {
MS_LOG(INFO) << "Start load model model.";
// model load model
uint32_t acl_model_id;
auto acl_ret = aclmdlLoadFromMem(om_data.Data(), om_data.DataSize(), &acl_model_id);
if (acl_ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Call aclmdlLoadFromMem failed, ret = " << acl_ret;
return lite::RET_ERROR;
}
// model init model resource
model_process_.set_model_id(acl_model_id);
int ret = model_process_.PreInitModelResource();
if (ret != lite::RET_OK) {
(void)aclmdlUnload(acl_model_id);
MS_LOG(ERROR) << "Pre init model resource failed.";
return ret;
}
MS_LOG(INFO) << "Load model model success.";
return lite::RET_OK;
}
STATUS ModelInfer::Inference(const std::vector<KernelTensorPtr> &inputs, const std::vector<KernelTensorPtr> &outputs) {
if (Load() != lite::RET_OK) {
MS_LOG(ERROR) << "Prepare model resource failed.";
return lite::RET_ERROR;
}
return model_process_.PredictFromHost(inputs, outputs);
}
std::set<uint64_t> ModelInfer::GetDynamicBatch() { return model_process_.GetDynamicBatch(); }
// need to be called after model load;
std::set<std::pair<uint64_t, uint64_t>> ModelInfer::GetDynamicImage() { return model_process_.GetDynamicImage(); }
} // namespace acl
} // namespace mindspore::kernel

View File

@ -0,0 +1,65 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_INFER_H_
#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_INFER_H_
#include <vector>
#include <memory>
#include <set>
#include <utility>
#include <string>
#include "extendrt/kernel/ascend/model/model_process.h"
#include "extendrt/kernel/ascend/model/acl_env_guard.h"
#include "extendrt/kernel/ascend/options/acl_model_options.h"
#include "include/api/types.h"
#include "include/errorcode.h"
namespace mindspore::kernel {
namespace acl {
using mindspore::lite::STATUS;
class ModelInfer {
public:
ModelInfer(const Buffer &om_data, const AclModelOptionsPtr &options);
~ModelInfer() = default;
STATUS Init();
STATUS Finalize();
STATUS Load();
STATUS Inference(const std::vector<KernelTensorPtr> &inputs, const std::vector<KernelTensorPtr> &outputs);
// need to be called after model load
std::set<uint64_t> GetDynamicBatch();
// need to be called after model load
std::set<std::pair<uint64_t, uint64_t>> GetDynamicImage();
private:
STATUS LoadAclModel(const Buffer &om_data);
bool init_flag_;
bool load_flag_;
std::string device_type_;
aclrtContext context_;
Buffer om_data_;
AclModelOptionsPtr options_;
ModelProcess model_process_;
std::shared_ptr<AclEnvGuard> acl_env_;
};
using ModelInferPtr = std::shared_ptr<ModelInfer>;
} // namespace acl
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_INFER_H_

View File

@ -0,0 +1,642 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "extendrt/kernel/ascend/model/model_process.h"
#include <sys/time.h>
#include <utility>
#include <algorithm>
#include <map>
#include "common/log_adapter.h"
#include "src/common/utils.h"
#include "src/common/log_util.h"
namespace mindspore::kernel {
namespace acl {
namespace {
constexpr size_t kBatchSizeNum = 1;
constexpr size_t kImageSizeHwNum = 2;
} // namespace
static TypeId TransToDataType(aclDataType data_type) {
static const std::map<aclDataType, enum TypeId> data_type_map = {
{ACL_FLOAT16, TypeId::kNumberTypeFloat16}, {ACL_FLOAT, TypeId::kNumberTypeFloat32},
{ACL_DOUBLE, TypeId::kNumberTypeFloat64}, {ACL_INT8, TypeId::kNumberTypeInt8},
{ACL_INT16, TypeId::kNumberTypeInt16}, {ACL_INT32, TypeId::kNumberTypeInt32},
{ACL_INT64, TypeId::kNumberTypeInt64}, {ACL_UINT8, TypeId::kNumberTypeUInt8},
{ACL_UINT16, TypeId::kNumberTypeUInt16}, {ACL_UINT32, TypeId::kNumberTypeUInt32},
{ACL_UINT64, TypeId::kNumberTypeUInt64}, {ACL_BOOL, TypeId::kNumberTypeBool},
};
auto it = data_type_map.find(data_type);
if (it == data_type_map.end()) {
return TypeId::kNumberTypeEnd;
} else {
return it->second;
}
}
template <class T>
inline static void ClearIfNotNull(T *vec) {
if (vec != nullptr) {
vec->clear();
}
}
template <class T, class U = std::vector<T>>
inline static void PushbackIfNotNull(U *vec, T &&item) {
if (vec != nullptr) {
vec->emplace_back(item);
}
}
static STATUS ConstructTensorDesc(const std::vector<AclTensorInfo> &acl_tensor_list, std::vector<std::string> *names,
std::vector<std::vector<int64_t>> *shapes, std::vector<enum TypeId> *data_types,
std::vector<size_t> *mem_sizes) {
ClearIfNotNull(names);
ClearIfNotNull(shapes);
ClearIfNotNull(data_types);
ClearIfNotNull(mem_sizes);
for (size_t i = 0; i < acl_tensor_list.size(); ++i) {
const auto &info = acl_tensor_list[i];
PushbackIfNotNull(names, info.name);
PushbackIfNotNull(shapes, info.dims);
PushbackIfNotNull(data_types, TransToDataType(info.data_type));
PushbackIfNotNull(mem_sizes, info.buffer_size);
}
if (names->size() != acl_tensor_list.size() || shapes->size() != acl_tensor_list.size() ||
data_types->size() != acl_tensor_list.size() || mem_sizes->size() != acl_tensor_list.size()) {
MS_LOG(ERROR) << "Inner error, size do not match: names size " << names->size() << " shapes size " << shapes->size()
<< " data types size " << data_types->size() << " mem sizes size " << mem_sizes->size()
<< " acl_tensor_list size " << acl_tensor_list.size();
return lite::RET_ERROR;
}
return lite::RET_OK;
}
static std::string ShapeToString(const std::vector<int64_t> &shape) {
std::string result = "[";
for (size_t i = 0; i < shape.size(); ++i) {
result += std::to_string(shape[i]);
if (i + 1 < shape.size()) {
result += ", ";
}
}
result += "]";
return result;
}
STATUS ModelProcess::PreInitModelResource() {
model_desc_ = aclmdlCreateDesc();
aclError acl_ret = aclmdlGetDesc(model_desc_, model_id_);
if (acl_ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Read model desc failed, ret = " << acl_ret;
return lite::RET_ERROR;
}
STATUS ret = InitInputsBuffer();
if (ret != lite::RET_OK) {
MS_LOG(ERROR) << "Create input buffer failed.";
return ret;
}
ret = InitOutputsBuffer();
if (ret != lite::RET_OK) {
MS_LOG(ERROR) << "Create output buffer failed.";
return ret;
}
return lite::RET_OK;
}
std::set<uint64_t> ModelProcess::GetDynamicBatch() {
if (model_desc_ == nullptr) {
MS_LOG(ERROR) << " Model desc is nullptr.";
return std::set<uint64_t>();
}
aclmdlBatch dynamic_batch;
if (aclmdlGetDynamicBatch(model_desc_, &dynamic_batch) != ACL_SUCCESS) {
MS_LOG(ERROR) << "Failed to get dynamic batch.";
return std::set<uint64_t>();
}
size_t batch_count = dynamic_batch.batchCount;
if (batch_count > ACL_MAX_BATCH_NUM) {
MS_LOG(ERROR) << "Real batch count " << batch_count << " is larger than max " << ACL_MAX_BATCH_NUM;
return std::set<uint64_t>();
}
std::set<uint64_t> batch;
for (size_t i = 0; i < dynamic_batch.batchCount; ++i) {
batch.insert(dynamic_batch.batch[i]);
}
return batch;
}
std::set<std::pair<uint64_t, uint64_t>> ModelProcess::GetDynamicImage() {
if (model_desc_ == nullptr) {
MS_LOG(ERROR) << " Model desc is nullptr.";
return std::set<std::pair<uint64_t, uint64_t>>();
}
aclmdlHW dynamic_hw;
if (aclmdlGetDynamicHW(model_desc_, 0, &dynamic_hw) != ACL_SUCCESS) {
MS_LOG(ERROR) << "Failed to get dynamic hw.";
return std::set<std::pair<uint64_t, uint64_t>>();
}
size_t hw_count = dynamic_hw.hwCount;
if (hw_count > ACL_MAX_HW_NUM) {
MS_LOG(ERROR) << "Real hw count " << hw_count << " is larger than max " << ACL_MAX_HW_NUM;
return std::set<std::pair<uint64_t, uint64_t>>();
}
std::set<std::pair<uint64_t, uint64_t>> image;
for (size_t i = 0; i < dynamic_hw.hwCount; ++i) {
image.insert(std::pair<uint64_t, uint64_t>(dynamic_hw.hw[i][0], dynamic_hw.hw[i][1]));
}
return image;
}
STATUS ModelProcess::InitInputsBuffer() {
aclError ret;
size_t input_size = aclmdlGetNumInputs(model_desc_);
MS_LOG(INFO) << "input_size = " << input_size;
for (size_t i = 0; i < input_size; ++i) {
auto buffer_size = aclmdlGetInputSizeByIndex(model_desc_, i);
void *data_mem_buffer = nullptr;
if (!is_run_on_device_) { // need to copy input/output to/from device
ret = aclrtMalloc(&data_mem_buffer, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Malloc device input buffer failed , input size " << buffer_size;
return lite::RET_ERROR;
}
}
aclmdlIODims dims;
ret = aclmdlGetInputDims(model_desc_, i, &dims);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Get input shape failed, ret = " << ret;
if (!is_run_on_device_) {
aclrtFree(data_mem_buffer);
}
return lite::RET_ERROR;
}
aclDataType data_type = aclmdlGetInputDataType(model_desc_, i);
std::vector<int64_t> shape(dims.dims, dims.dims + dims.dimCount);
std::string input_name = aclmdlGetInputNameByIndex(model_desc_, i);
if (input_name.empty()) {
MS_LOG(WARNING) << "Get name of input " << i << " failed.";
}
MS_LOG(INFO) << "Name of input " << i << " is " << input_name;
input_infos_.emplace_back(
AclTensorInfo{data_mem_buffer, data_mem_buffer, buffer_size, data_type, shape, input_name});
}
MS_LOG(INFO) << "Create model inputs success";
return lite::RET_OK;
}
STATUS ModelProcess::CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) {
if (data_mem_buffer == nullptr) {
MS_LOG(ERROR) << "Data mem buffer is nullptr.";
return lite::RET_ERROR;
}
aclError ret;
auto free_data_buffer = [this](void *dataMemBuffer) {
if (!is_run_on_device_) {
(void)aclrtFree(dataMemBuffer);
} else {
(void)aclrtFreeHost(dataMemBuffer);
}
};
if (!is_run_on_device_) {
ret = aclrtMalloc(data_mem_buffer, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Malloc device buffer failed , buffer size " << buffer_size;
return lite::RET_ERROR;
}
} else {
ret = aclrtMallocHost(data_mem_buffer, buffer_size);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Malloc host buffer failed , buffer size " << buffer_size;
return lite::RET_ERROR;
}
}
auto data_buffer = aclCreateDataBuffer(*data_mem_buffer, buffer_size);
if (data_buffer == nullptr) {
MS_LOG(ERROR) << "Create Data Buffer failed";
free_data_buffer(*data_mem_buffer);
return lite::RET_ERROR;
}
ret = aclmdlAddDatasetBuffer(dataset, data_buffer);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "add data buffer failed";
free_data_buffer(*data_mem_buffer);
aclDestroyDataBuffer(data_buffer);
return lite::RET_ERROR;
}
return lite::RET_OK;
}
STATUS ModelProcess::InitOutputsBuffer() {
aclError ret;
outputs_ = aclmdlCreateDataset();
if (outputs_ == nullptr) {
MS_LOG(ERROR) << "Create output dataset failed";
return lite::RET_ERROR;
}
size_t output_size = aclmdlGetNumOutputs(model_desc_);
MS_LOG(INFO) << "Output_size = " << output_size;
for (size_t i = 0; i < output_size; ++i) {
auto buffer_size = aclmdlGetOutputSizeByIndex(model_desc_, i);
void *data_mem_buffer = nullptr;
if (CreateDataBuffer(&data_mem_buffer, buffer_size, outputs_) != lite::RET_OK) {
MS_LOG(ERROR) << "Add output data buffer failed, buffer size " << buffer_size;
return lite::RET_ERROR;
}
aclmdlIODims dims;
ret = aclmdlGetOutputDims(model_desc_, i, &dims);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Get output shape failed";
if (!is_run_on_device_) {
aclrtFree(data_mem_buffer);
} else {
aclrtFreeHost(data_mem_buffer);
}
return lite::RET_OK;
}
aclFormat format = aclmdlGetOutputFormat(model_desc_, i);
MS_LOG(DEBUG) << "The output format of om is " << format;
aclDataType data_type = aclmdlGetOutputDataType(model_desc_, i);
std::vector<int64_t> shape(dims.dims, dims.dims + dims.dimCount);
std::string output_name = aclmdlGetOutputNameByIndex(model_desc_, i);
if (output_name.empty()) {
MS_LOG(WARNING) << "Get name of output " << i << " failed.";
}
MS_LOG(INFO) << "Name of om output " << i << " is " << output_name << "Buffer size " << buffer_size;
output_infos_.emplace_back(
AclTensorInfo{data_mem_buffer, data_mem_buffer, buffer_size, data_type, shape, output_name});
}
MS_LOG(INFO) << "Create model output success.";
return lite::RET_OK;
}
void ModelProcess::DestroyInputsDataset() {
if (inputs_ == nullptr) {
return;
}
for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(inputs_); i++) {
auto dataBuffer = aclmdlGetDatasetBuffer(inputs_, i);
aclDestroyDataBuffer(dataBuffer);
}
aclmdlDestroyDataset(inputs_);
inputs_ = nullptr;
}
void ModelProcess::DestroyInputsDataMem() {
if (!is_run_on_device_) {
for (const auto &item : input_infos_) {
aclrtFree(item.device_data);
}
}
input_infos_.clear();
}
void ModelProcess::DestroyInputsBuffer() {
DestroyInputsDataMem();
DestroyInputsDataset();
}
void ModelProcess::DestroyOutputsBuffer() {
for (const auto &item : output_infos_) {
if (!is_run_on_device_) {
aclrtFree(item.device_data);
} else {
aclrtFreeHost(item.device_data);
}
}
output_infos_.clear();
if (outputs_ == nullptr) {
return;
}
for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(outputs_); i++) {
auto dataBuffer = aclmdlGetDatasetBuffer(outputs_, i);
aclDestroyDataBuffer(dataBuffer);
}
aclmdlDestroyDataset(outputs_);
outputs_ = nullptr;
}
STATUS ModelProcess::UnLoad() {
auto ret = aclmdlUnload(model_id_);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Unload model failed, ret = " << ret;
return lite::RET_ERROR;
}
if (model_desc_ != nullptr) {
ret = aclmdlDestroyDesc(model_desc_);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Unload model failed, ret = " << ret;
return lite::RET_ERROR;
}
model_desc_ = nullptr;
}
DestroyInputsBuffer();
DestroyOutputsBuffer();
MS_LOG(INFO) << "End unload model " << model_id_;
return lite::RET_OK;
}
STATUS ModelProcess::SetBatchSize(const std::vector<KernelTensorPtr> &inputs) {
for (size_t i = 0; i < inputs.size(); i++) {
input_infos_[i].buffer_size = inputs[i]->GetData()->size;
}
auto batch_size_tensor = inputs[inputs.size() - 1];
size_t data_type_size = lite::DataTypeSize(batch_size_tensor->GetDtype());
size_t num = 0;
if (data_type_size != 0) {
num = batch_size_tensor->GetData()->size / data_type_size;
}
if (num != kBatchSizeNum) {
MS_LOG(ERROR) << "Batch size num should be " << kBatchSizeNum;
return lite::RET_ERROR;
}
auto *ptr = reinterpret_cast<const int32_t *>(batch_size_tensor->GetData()->addr);
CHECK_NULL_RETURN(ptr);
auto batch_size = ptr[0];
aclError ret;
size_t index;
ret = aclmdlGetInputIndexByName(model_desc_, ACL_DYNAMIC_TENSOR_NAME, &index);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Get index failed";
return lite::RET_ERROR;
}
MS_LOG(INFO) << "Set Batch size(" << batch_size << ") of input " << index << ".";
ret = aclmdlSetDynamicBatchSize(model_id_, inputs_, index, batch_size);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_;
return lite::RET_ERROR;
}
return lite::RET_OK;
}
STATUS ModelProcess::SetImageSize(const std::vector<KernelTensorPtr> &inputs) {
for (size_t i = 0; i < inputs.size(); i++) {
input_infos_[i].buffer_size = inputs[i]->GetData()->size;
}
auto image_size_tensor = inputs[inputs.size() - 1];
size_t data_type_size = lite::DataTypeSize(image_size_tensor->GetDtype());
size_t num = 0;
if (data_type_size != 0) {
num = image_size_tensor->GetData()->size / data_type_size;
}
if (num != kImageSizeHwNum) {
MS_LOG(ERROR) << "Image size hw num should be " << kImageSizeHwNum;
return lite::RET_ERROR;
}
auto *hw = reinterpret_cast<const int32_t *>(image_size_tensor->GetData()->addr);
CHECK_NULL_RETURN(hw);
int32_t height = hw[0];
int32_t width = hw[1];
size_t index;
aclError ret = ACL_ERROR_NONE;
ret = aclmdlGetInputIndexByName(model_desc_, ACL_DYNAMIC_TENSOR_NAME, &index);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Get index failed";
return lite::RET_ERROR;
}
MS_LOG(INFO) << "Set Image size(" << height << "," << width << ") of input " << index << ".";
ret = aclmdlSetDynamicHWSize(model_id_, inputs_, index, height, width);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_;
return lite::RET_ERROR;
}
return lite::RET_OK;
}
STATUS ModelProcess::CheckTensorByTensorInfo(const std::vector<KernelTensorPtr> &tensor,
const std::vector<AclTensorInfo> &tensor_info) {
if (!IsDynamicShape()) {
for (size_t i = 0; i < tensor_info.size(); ++i) {
if (tensor[i]->GetShapeVector() != tensor_info[i].dims) {
MS_LOG(WARNING) << "Note: input " << i << " shape not match, required " << ShapeToString(tensor_info[i].dims)
<< ", given " << ShapeToString(tensor[i]->GetShapeVector()) << "."
<< "Please check input shape has been modified by DVPP method.";
}
if (tensor[i]->GetDtype() != TransToDataType(tensor_info[i].data_type)) {
MS_LOG(ERROR) << "Note: input " << i << " data type not match, required "
<< static_cast<int>(TransToDataType(tensor_info[i].data_type)) << ", given "
<< static_cast<int>(tensor[i]->GetDtype());
return lite::RET_ERROR;
}
if (tensor[i]->GetData()->size != tensor_info[i].buffer_size) {
MS_LOG(ERROR) << "Input " << i << " data size not match, required size " << tensor_info[i].buffer_size
<< ", given count " << tensor[i]->GetData()->size;
return lite::RET_ERROR;
}
}
}
return lite::RET_OK;
}
STATUS ModelProcess::ProcDynamicShape(const std::vector<KernelTensorPtr> &inputs) {
if (!IsDynamicShape()) {
MS_LOG(DEBUG) << "Input is not dynamic shape";
return lite::RET_OK;
}
if (IsDynamicBatchSize()) {
if (SetBatchSize(inputs) != lite::RET_OK) {
MS_LOG(ERROR) << "Set dynamic batch size failed.";
return lite::RET_ERROR;
}
}
if (IsDynamicImageSize()) {
if (SetImageSize(inputs) != lite::RET_OK) {
MS_LOG(ERROR) << "Set dynamic image size failed.";
return lite::RET_ERROR;
}
}
if (ResetOutputSize() != lite::RET_OK) {
MS_LOG(ERROR) << "Reset output size failed";
return lite::RET_ERROR;
}
return lite::RET_OK;
}
bool ModelProcess::IsDynamicShape() { return IsDynamicBatchSize() || IsDynamicImageSize(); }
bool ModelProcess::IsDynamicBatchSize() { return !GetDynamicBatch().empty(); }
bool ModelProcess::IsDynamicImageSize() { return !GetDynamicImage().empty(); }
STATUS ModelProcess::CheckAndInitInput(const std::vector<KernelTensorPtr> &inputs) {
aclError ret;
inputs_ = aclmdlCreateDataset();
// check inputs
if (CheckTensorByTensorInfo(inputs, input_infos_) != lite::RET_OK) {
MS_LOG(ERROR) << "Check input tensor failed.";
return lite::RET_ERROR;
}
// copy inputs
for (size_t i = 0; i < input_infos_.size(); ++i) {
auto &info = input_infos_[i];
auto input = inputs[i];
void *data = input->GetData()->addr;
void *input_buffer = nullptr;
if (!is_run_on_device_) {
info.cur_device_data = info.device_data;
ret =
aclrtMemcpy(info.cur_device_data, info.buffer_size, data, input->GetData()->size, ACL_MEMCPY_HOST_TO_DEVICE);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Acl memcpy input " << i
<< " data to device failed, src input size: " << input->GetData()->size
<< ", dst device buffer size: " << info.buffer_size;
return lite::RET_ERROR;
}
input_buffer = info.cur_device_data;
} else {
input_buffer = data;
}
auto data_buffer = aclCreateDataBuffer(input_buffer, info.buffer_size);
if (data_buffer == nullptr) {
MS_LOG(ERROR) << "Create Data Buffer failed";
return lite::RET_ERROR;
}
ret = aclmdlAddDatasetBuffer(inputs_, data_buffer);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Add data buffer failed";
aclDestroyDataBuffer(data_buffer);
return lite::RET_ERROR;
}
}
if (ProcDynamicShape(inputs) != lite::RET_OK) {
MS_LOG(ERROR) << "Proc input dynamic shape failed.";
return lite::RET_ERROR;
}
return lite::RET_OK;
}
STATUS ModelProcess::ResetOutputSize() {
aclDataType output_type;
aclError ret;
size_t output_size = aclmdlGetNumOutputs(model_desc_);
for (size_t index = 0; index < output_size; index++) {
size_t dims = 1;
struct aclmdlIODims output_dims;
ret = aclmdlGetCurOutputDims(model_desc_, index, &output_dims);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "get output dim error.";
return lite::RET_ERROR;
}
std::vector<int64_t> shape(output_dims.dims, output_dims.dims + output_dims.dimCount);
for (size_t i = 0; i < output_dims.dimCount; i++) {
dims *= output_dims.dims[i];
}
output_type = aclmdlGetOutputDataType(model_desc_, index);
output_infos_[index].dims = shape;
output_infos_[index].buffer_size = dims * aclDataTypeSize(output_type);
}
return lite::RET_OK;
}
STATUS ModelProcess::PredictFromHost(const std::vector<KernelTensorPtr> &inputs,
const std::vector<KernelTensorPtr> &outputs) {
STATUS ret = CheckAndInitInput(inputs);
if (ret != lite::RET_OK) {
MS_LOG(ERROR) << "Check or init input failed";
DestroyInputsDataset();
return ret; // forward status error
}
aclError acl_ret;
auto env = std::getenv("GLOG_v");
if (env != nullptr && env[0] == '1') {
struct timeval start_time;
struct timeval end_time;
(void)gettimeofday(&start_time, nullptr);
acl_ret = aclmdlExecute(model_id_, inputs_, outputs_);
(void)gettimeofday(&end_time, nullptr);
constexpr uint64_t kUSecondInSecond = 1000000;
uint64_t cost =
(kUSecondInSecond * static_cast<uint64_t>(end_time.tv_sec) + static_cast<uint64_t>(end_time.tv_usec)) -
(kUSecondInSecond * static_cast<uint64_t>(start_time.tv_sec) + static_cast<uint64_t>(start_time.tv_usec));
MS_LOG(INFO) << "Model execute in " << cost << " us";
} else {
acl_ret = aclmdlExecute(model_id_, inputs_, outputs_);
}
DestroyInputsDataset();
if (acl_ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Execute Model Failed, ret = " << acl_ret;
return lite::RET_ERROR;
}
ret = GetOutputs(outputs);
if (ret != lite::RET_OK) {
MS_LOG(ERROR) << "Build outputs failed";
return ret;
}
MS_LOG(INFO) << "Execute model success";
return lite::RET_OK;
}
STATUS ModelProcess::GetOutputs(const std::vector<KernelTensorPtr> &outputs) {
if (outputs.empty()) {
MS_LOG(ERROR) << "Ms tensor outputs is empty.";
return lite::RET_ERROR;
}
if (ConstructTensor(outputs) != lite::RET_OK) {
MS_LOG(ERROR) << "Construct ms tensor failed.";
return lite::RET_ERROR;
}
return lite::RET_OK;
}
STATUS ModelProcess::ConstructTensor(const std::vector<KernelTensorPtr> &outputs) {
if (outputs.size() != output_infos_.size()) {
MS_LOG(ERROR) << "Actual tensor count not match, required count " << output_infos_.size() << ", given count "
<< outputs.size();
return lite::RET_ERROR;
}
std::vector<std::string> names;
std::vector<std::vector<int64_t>> shapes;
std::vector<enum TypeId> data_types;
std::vector<size_t> mem_sizes;
if (ConstructTensorDesc(output_infos_, &names, &shapes, &data_types, &mem_sizes) != lite::RET_OK) {
MS_LOG(ERROR) << "Construct tensor desc failed.";
return lite::RET_ERROR;
}
// set output info and malloc data size
for (size_t i = 0; i < output_infos_.size(); ++i) {
if (outputs[i]->GetData()->size != mem_sizes[i]) {
MS_LOG(ERROR) << "Ms tensor size " << outputs[i]->GetData()->size << " not match model tensor size "
<< mem_sizes[i];
return lite::RET_ERROR;
}
}
aclrtMemcpyKind kind = is_run_on_device_ ? ACL_MEMCPY_HOST_TO_HOST : ACL_MEMCPY_DEVICE_TO_HOST;
for (size_t i = 0; i < output_infos_.size(); ++i) {
if (output_infos_[i].cur_device_data == nullptr) {
// when run on device, cur_device_data is nullptr before first execute
continue;
}
auto ret = aclrtMemcpy(outputs[i]->GetData()->addr, outputs[i]->GetData()->size, output_infos_[i].cur_device_data,
output_infos_[i].buffer_size, kind);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Memcpy input " << i << " from " << (is_run_on_device_ ? "host" : "device")
<< " to host failed, memory size " << output_infos_[i].buffer_size;
return lite::RET_ERROR;
}
}
return lite::RET_OK;
}
} // namespace acl
} // namespace mindspore::kernel

View File

@ -0,0 +1,104 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_PROCESS_H_
#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_PROCESS_H_
#include <vector>
#include <string>
#include <map>
#include <set>
#include <utility>
#include "acl/acl.h"
#include "acl/acl_mdl.h"
#include "acl/acl_rt.h"
#include "include/api/types.h"
#include "include/errorcode.h"
#include "kernel/kernel.h"
#include "extendrt/kernel/ascend/options/acl_model_options.h"
namespace mindspore::kernel {
namespace acl {
using mindspore::lite::STATUS;
struct AclTensorInfo {
void *cur_device_data;
void *device_data;
size_t buffer_size;
aclDataType data_type;
std::vector<int64_t> dims;
std::string name;
};
class ModelProcess {
public:
explicit ModelProcess(const AclModelOptionsPtr &options)
: options_(options),
model_id_(0xffffffff),
is_run_on_device_(false),
model_desc_(nullptr),
inputs_(nullptr),
outputs_(nullptr),
input_infos_(),
output_infos_() {}
~ModelProcess() {}
STATUS UnLoad();
STATUS PredictFromHost(const std::vector<KernelTensorPtr> &inputs, const std::vector<KernelTensorPtr> &outputs);
STATUS PreInitModelResource();
// override this method to avoid request/reply data copy
void SetIsDevice(bool is_device) { is_run_on_device_ = is_device; }
void set_model_id(uint32_t model_id) { model_id_ = model_id; }
uint32_t model_id() const { return model_id_; }
std::set<uint64_t> GetDynamicBatch();
std::set<std::pair<uint64_t, uint64_t>> GetDynamicImage();
private:
STATUS CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset);
STATUS CheckAndInitInput(const std::vector<KernelTensorPtr> &inputs);
STATUS CheckTensorByTensorInfo(const std::vector<KernelTensorPtr> &tensor,
const std::vector<AclTensorInfo> &tensor_info);
STATUS GetOutputs(const std::vector<KernelTensorPtr> &outputs);
STATUS ConstructTensor(const std::vector<KernelTensorPtr> &outputs);
STATUS SetBatchSize(const std::vector<KernelTensorPtr> &inputs);
STATUS SetImageSize(const std::vector<KernelTensorPtr> &inputs);
STATUS InitInputsBuffer();
STATUS InitOutputsBuffer();
STATUS ResetOutputSize();
STATUS ProcDynamicShape(const std::vector<KernelTensorPtr> &inputs);
std::string VectorToString(const std::vector<int64_t> &);
bool IsDynamicShape();
bool IsDynamicBatchSize();
bool IsDynamicImageSize();
void DestroyInputsDataset();
void DestroyInputsDataMem();
void DestroyInputsBuffer();
void DestroyOutputsBuffer();
AclModelOptionsPtr options_;
uint32_t model_id_;
// if run one device(AICPU), there is no need to alloc device memory and copy inputs to(/outputs from) device
bool is_run_on_device_;
aclmdlDesc *model_desc_;
aclmdlDataset *inputs_;
aclmdlDataset *outputs_;
std::vector<AclTensorInfo> input_infos_;
std::vector<AclTensorInfo> output_infos_;
};
} // namespace acl
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_MODEL_MODEL_PROCESS_H_

View File

@ -0,0 +1,39 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_MODEL_OPTIONS_H_
#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_MODEL_OPTIONS_H_
#include <string>
#include <set>
#include <utility>
#include <memory>
namespace mindspore::kernel {
namespace acl {
struct AclModelOptions {
int32_t device_id;
std::string dump_cfg_path;
std::set<uint64_t> batch_size;
std::set<std::pair<uint64_t, uint64_t>> image_size;
AclModelOptions() : device_id(0) {}
};
using AclModelOptionsPtr = std::shared_ptr<AclModelOptions>;
} // namespace acl
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_MODEL_OPTIONS_H_

View File

@ -0,0 +1,80 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "extendrt/kernel/ascend/options/acl_options_parser.h"
#include <utility>
#include <vector>
#include "common/log_adapter.h"
#include "src/common/log_util.h"
#include "src/common/utils.h"
#include "acl/acl_base.h"
#include "acl/acl_rt.h"
namespace mindspore::kernel {
namespace acl {
constexpr auto kImageHwNum = 2;
STATUS AclOptionsParser::ParseAclOptions(const mindspore::Context *ctx, AclModelOptionsPtr *const acl_options) {
CHECK_NULL_RETURN(ctx);
CHECK_NULL_RETURN(acl_options);
auto context = const_cast<mindspore::Context *>(ctx);
CHECK_NULL_RETURN(context);
auto device_infos = context->MutableDeviceInfo();
if (device_infos.size() < 1) {
MS_LOG(WARNING) << "Context is not set device info, please check.";
return lite::RET_OK;
}
CHECK_NULL_RETURN(device_infos[0]);
if (ParseOptions(device_infos[0], acl_options) != lite::RET_OK) {
MS_LOG(ERROR) << "Parse model options failed.";
return lite::RET_ERROR;
}
return lite::RET_OK;
}
STATUS AclOptionsParser::ParseOptions(const std::shared_ptr<DeviceInfoContext> &device_info,
AclModelOptions *acl_options) {
auto ascend_info = device_info->Cast<mindspore::AscendDeviceInfo>();
if (ascend_info == nullptr) {
MS_LOG(ERROR) << "There is no ascend info.";
return lite::RET_ERROR;
}
int32_t device_id = static_cast<int32_t>(ascend_info->GetDeviceID());
if (CheckDeviceId(&device_id) != lite::RET_OK) {
MS_LOG(ERROR) << "Check device id failed, device id = " << device_id;
return lite::RET_ERROR;
}
acl_options->device_id = device_id;
return lite::RET_OK;
}
STATUS AclOptionsParser::CheckDeviceId(int32_t *device_id) {
CHECK_NULL_RETURN(device_id);
uint32_t device_count;
if (aclrtGetDeviceCount(&device_count) != ACL_ERROR_NONE) {
MS_LOG(WARNING) << "Get device count failed.";
return lite::RET_OK;
}
if (*device_id >= static_cast<int32_t>(device_count)) {
MS_LOG(ERROR) << "Current device id " << *device_id << " is larger than max count " << device_count
<< ",please check the device info of context.";
return lite::RET_ERROR;
}
return lite::RET_OK;
}
} // namespace acl
} // namespace mindspore::kernel

View File

@ -0,0 +1,41 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_OPTIONS_PARSER_H_
#define MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_OPTIONS_PARSER_H_
#include <memory>
#include <string>
#include "include/api/context.h"
#include "include/errorcode.h"
#include "extendrt/kernel/ascend/options/acl_model_options.h"
namespace mindspore::kernel {
namespace acl {
using mindspore::lite::STATUS;
class AclOptionsParser {
public:
STATUS ParseAclOptions(const mindspore::Context *ctx, AclModelOptionsPtr *const acl_options);
private:
STATUS ParseOptions(const std::shared_ptr<DeviceInfoContext> &device_info, AclModelOptions *acl_options);
STATUS CheckDeviceId(int32_t *device_id);
};
} // namespace acl
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_EXTENDRT_KERNEL_ASCEND_SRC_ACL_OPTIONS_PARSER_H_

View File

@ -52,16 +52,19 @@ Status SingleOpInferSession::CompileGraph(FuncGraphPtr graph) {
for (const auto &kernel_node : kernel_nodes) {
mindspore::infer::SetKernelInfo(kernel_node);
std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
std::shared_ptr<kernel::CpuKernelMod> cpu_kernel_mod =
kernel::Factory<kernel::CpuKernelMod>::Instance().Create(kernel_name);
std::shared_ptr<kernel::KernelMod> kernel_mod = kernel::Factory<kernel::KernelMod>::Instance().Create(kernel_name);
MS_LOG(INFO) << "SingleOpInferSession::Kernels " << kernel_name;
auto args = kernel::AbstractArgsFromCNode(kernel_node);
auto ret = cpu_kernel_mod->Init(args.op, args.inputs, args.outputs);
if (kernel_mod == nullptr) {
MS_LOG(EXCEPTION) << "Kernel mod is nullptr, kernel name: " << kernel_name;
}
mindspore::infer::CopyInputWeights(kernel_node, args.inputs);
auto ret = kernel_mod->Init(args.op, args.inputs, args.outputs);
MS_LOG(INFO) << "SingleOpInferSession::Kernels ret " << ret;
if (!ret) {
MS_LOG(EXCEPTION) << "kernel init failed " << kernel_name;
}
if (cpu_kernel_mod->Resize(args.op, args.inputs, args.outputs, kernel::GetKernelDepends(kernel_node)) ==
if (kernel_mod->Resize(args.op, args.inputs, args.outputs, kernel::GetKernelDepends(kernel_node)) ==
kernel::KRET_RESIZE_FAILED) {
MS_LOG(EXCEPTION) << "CPU kernel op [" << kernel_node->fullname_with_scope() << "] Resize failed.";
}
@ -90,10 +93,10 @@ Status SingleOpInferSession::CompileGraph(FuncGraphPtr graph) {
tensor_size = std::max(tensor_size, type_size);
(void)output_size_list.emplace_back(tensor_size);
}
cpu_kernel_mod->SetInputSizeList(input_size_list);
cpu_kernel_mod->SetOutputSizeList(output_size_list);
kernel_mod->SetInputSizeList(input_size_list);
kernel_mod->SetOutputSizeList(output_size_list);
AnfAlgo::SetKernelMod(cpu_kernel_mod, kernel_node.get());
AnfAlgo::SetKernelMod(kernel_mod, kernel_node.get());
}
this->AssignKernelGraphAddress(kernel_graph_);
@ -284,9 +287,29 @@ device::DeviceAddressPtr SingleOpInferSession::CreateDeviceAddress(void *device_
return std::make_shared<InferDeviceAddress>(device_ptr, device_size, format, type_id);
}
std::vector<AnfNodePtr> SingleOpInferSession::GetGraphDataInputs() const {
MS_EXCEPTION_IF_NULL(kernel_graph_);
std::vector<AnfNodePtr> data_inputs;
auto inputs = kernel_graph_->inputs();
for (auto input : inputs) {
if (input->isa<Parameter>()) {
auto parameter = input->cast<ParameterPtr>();
if (parameter != nullptr && !parameter->has_default()) {
data_inputs.push_back(input);
}
}
}
return data_inputs;
}
void SingleOpInferSession::CopyInputs(const std::vector<tensor::TensorPtr> inputs) {
MS_EXCEPTION_IF_NULL(kernel_graph_);
auto graph_inputs = kernel_graph_->inputs();
auto graph_inputs = GetGraphDataInputs();
if (graph_inputs.size() != inputs.size()) {
MS_LOG(ERROR) << "Graph inputs size[" << graph_inputs.size() << "] is not equal to User input size[ "
<< inputs.size() << "].";
return;
}
for (size_t i = 0; i < graph_inputs.size(); i++) {
auto input = inputs[i];
auto graph_input = graph_inputs[i];

View File

@ -49,6 +49,7 @@ class SingleOpInferSession : public InferSession {
device::DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,
TypeId type_id) const;
void CopyInputs(const std::vector<tensor::TensorPtr> inputs);
std::vector<AnfNodePtr> GetGraphDataInputs() const;
void CopyOutputs(std::vector<tensor::TensorPtr> *outputs);
private:

View File

@ -35,6 +35,9 @@ using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
using mindspore::kernel::KernelBuildInfo;
namespace {
constexpr auto kParamDynamic = "dynamic";
constexpr auto kCustomAscendInputNum = 3;
constexpr auto kNameCustomAscend = "CustomAscend";
constexpr auto kCustomTypeAscend = "acl_build";
bool IsInputNotCNode(const CNodePtr &kernel_node, size_t input_index) {
auto input_node = common::AnfAlgo::VisitKernel(kernel_node->input(input_index + 1), 0).first;
@ -326,8 +329,9 @@ void UpdateCustomKernelBuildInfo(const CNodePtr &kernel_node, bool is_akg_op) {
GetOutputFormat(kernel_node, &output_formats);
builder->SetOutputsDeviceType(output_types);
builder->SetOutputsFormat(output_formats);
// AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), kernel_node.get());
if (op_name == kNameCustomAscend) {
AnfAlgo::SetSelectKernelBuildInfo(builder->Build(), kernel_node.get());
}
// check reg info if kernel_attr is not null
if (kernel_attr != nullptr) {
std::vector<std::shared_ptr<KernelBuildInfo>> kernel_info_list;
@ -465,6 +469,10 @@ std::pair<std::string, ExceptionType> SetKernelInfoWithMsg(const CNodePtr &kerne
UpdateCustomKernelBuildInfo(kernel_node, true);
return {};
}
if (tp == kCustomTypeAscend) {
UpdateCustomKernelBuildInfo(kernel_node, false);
return {};
}
// If Custom op has not set reg info, then infer info from inputs
if (mindspore::kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kCPU) == nullptr) {
MS_LOG(WARNING) << "Not find operator information for Custom operator[" << op_name << "]. "
@ -535,5 +543,49 @@ void SetKernelInfo(const CNodePtr &kernel_node) {
if (msg.empty()) return;
MS_EXCEPTION(etype) << msg;
}
void CopyInputWeights(const CNodePtr &kernel_node, const std::vector<kernel::KernelTensorPtr> &inputs) {
std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
if (kernel_name == kNameCustomAscend) {
auto node_input_size = kernel_node->inputs().size();
if (node_input_size < kCustomAscendInputNum) {
MS_LOG(ERROR) << "Input num of custom ascend kernel should larger than " << (kCustomAscendInputNum - 1)
<< ", real num is " << node_input_size;
return;
}
if (node_input_size != inputs.size() + 1) {
MS_LOG(ERROR) << "Input num of custom ascend kernel [" << node_input_size << "]"
<< " is not equal to kernel tensor size[" << (inputs.size() + 1) << "].";
return;
}
auto om_input = kernel_node->input(node_input_size - 1);
if (!om_input->isa<Parameter>()) {
MS_LOG(ERROR) << "Om input is not parameter.";
return;
}
ParameterPtr om_param = om_input->cast<ParameterPtr>();
if (om_param == nullptr || !om_param->has_default()) {
MS_LOG(ERROR) << "Om param is invalid, val= " << om_param;
return;
}
auto tensor = std::static_pointer_cast<tensor::Tensor>(om_param->default_param());
if (tensor == nullptr) {
MS_LOG(ERROR) << "Tensor is nullptr.";
return;
}
if (tensor->data_c() == nullptr || tensor->Size() == 0) {
MS_LOG(ERROR) << "Tensor data is invalid.";
return;
}
auto new_addr = malloc(tensor->Size());
if (new_addr == nullptr) {
MS_LOG(ERROR) << "Malloc failed, size= " << tensor->Size();
return;
}
memcpy(new_addr, tensor->data_c(), tensor->Size());
kernel::AddressPtr addr_ptr = std::make_shared<kernel::Address>(new_addr, tensor->Size());
inputs[inputs.size() - 1]->SetData(addr_ptr);
}
}
} // namespace infer
} // namespace mindspore

View File

@ -26,11 +26,13 @@
#include "ir/anf.h"
#include "ir/dtype/type.h"
#include "include/common/utils/utils.h"
#include "mindspore/ccsrc/kernel/kernel.h"
namespace mindspore {
namespace infer {
using DataType = std::pair<TypeId, std::string>;
void SetKernelInfo(const CNodePtr &apply_kernel_ptr);
void CopyInputWeights(const CNodePtr &kernel_node, const std::vector<kernel::KernelTensorPtr> &inputs);
} // namespace infer
} // namespace mindspore

View File

@ -15,7 +15,11 @@
*/
#include "tools/converter/adapter/acl/src/acl_model_process.h"
#ifdef ENABLE_CLOUD_FUSION_INFERENCE
#include "src/extendrt/kernel/ascend/model/acl_env_guard.h"
#else
#include "src/runtime/kernel/ascend/src/acl_env_guard.h"
#endif
#include "src/common/log_util.h"
#include "acl/acl.h"
#include "acl/acl_rt.h"

View File

@ -52,6 +52,8 @@ constexpr auto kInferShapePass = "InferShapePass";
constexpr auto kConstFoldPass = "ConstFoldPass";
constexpr auto kRemoveRedundantOpPass = "RemoveRedundantOpPass";
constexpr auto kDelRedundantTranspose = "DeleteRedundantTranspose";
constexpr auto kFuncType = "func_type";
constexpr auto kUniqueName = "uniq_name";
constexpr size_t kDependInputNum = 3;
constexpr size_t kDependFirstInputIdx = 1;
constexpr size_t kTupleGetItemFirstInputIdx = 1;
@ -155,6 +157,10 @@ STATUS AclPassImpl::PreProcGraph(const FuncGraphPtr &func_graph) {
}
STATUS AclPassImpl::PostProcGraph(const FuncGraphPtr &func_graph) {
if (lite::acl::DelRedundantParameter(func_graph) != RET_SUCCESS) {
MS_LOG(ERROR) << "Delete redundant parameters failed.";
return lite::RET_ERROR;
}
if (!user_options_cfg_.offline) {
MS_LOG(DEBUG) << "Online model infer no need to change to nhwc format.";
return lite::RET_OK;
@ -548,6 +554,8 @@ void AclPassImpl::SetCustomAttrs(const std::shared_ptr<ops::Custom> &prim) {
std::vector<uint8_t> output_dim_char(output_dim_str.begin(), output_dim_str.end());
std::map<std::string, std::vector<uint8_t>> attrs = {{lite::acl::kOutputShapes, output_dim_char}};
prim->set_attr(attrs);
prim->AddAttr(kFuncType, api::MakeValue<std::string>("acl_build"));
prim->AddAttr(kUniqueName, api::MakeValue<std::string>("CustomAscend"));
}
CNodePtr AclPassImpl::CreateCustomNode(const FuncGraphPtr &func_graph) {

View File

@ -136,6 +136,9 @@ STATUS DeleteRedundantTranspose::TransTransFusion(const FuncGraphPtr &func_graph
if (!manager_->Replace(cnode, pre_cnode->input(1))) {
MS_LOG(ERROR) << "replace old node failed, please check.";
return lite::RET_ERROR;
} else {
func_graph->DropNode(cnode->input(kInputIndexTwo));
func_graph->DropNode(pre_cnode->input(kInputIndexTwo));
}
}
}