forked from mindspore-Ecosystem/mindspore
upgrade Ascend software suite 29 Apr 22
This commit is contained in:
parent
8be4f542e7
commit
f7dfcb0494
|
@ -1 +1 @@
|
|||
Subproject commit d63e074e5882d0461769cc0893e7e722ff1695f7
|
||||
Subproject commit 2537ca0608f64be64d6d98b048b7ec4e48f4acc3
|
|
@ -430,9 +430,12 @@ if(MODE_ASCEND_ALL OR MODE_ASCEND_ACL)
|
|||
endif()
|
||||
|
||||
if(ENABLE_D)
|
||||
find_library(GE_RUNNER ge_runner ${ASCEND_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(GRAPH graph ${ASCEND_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(HCCL hccl ${ASCEND_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
|
||||
find_library(GE_RUNNER ge_runner ${ASCEND_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
find_library(GRAPH graph ${ASCEND_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
find_library(HCCL hccl ${ASCEND_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
target_link_libraries(mindspore_backend PUBLIC ${GE_RUNNER} ${GRAPH} ${HCCL})
|
||||
endif()
|
||||
|
||||
|
|
|
@ -130,25 +130,25 @@ if(ENABLE_D OR ENABLE_ACL)
|
|||
# 310 mode
|
||||
add_compile_definitions(ENABLE_DVPP_INTERFACE)
|
||||
find_library(acl libascendcl.so ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_PATH})
|
||||
${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
find_library(acl_cblas libacl_cblas.so ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_PATH})
|
||||
${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
find_library(acl_dvpp libacl_dvpp.so ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_PATH})
|
||||
${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
find_library(acl_runtime libruntime.so ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_PATH})
|
||||
${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
find_library(ge_compiler libge_compiler.so ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_PATH})
|
||||
${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
find_library(libplatform libplatform.so ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_PATH})
|
||||
${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
find_library(libcompress libcompress.so ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_PATH})
|
||||
${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
find_library(libopskernel libopskernel.so ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_PATH})
|
||||
${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
find_library(libaicore_utils libaicore_utils.so ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_PATH})
|
||||
${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
find_library(libaicpu_engine_common libaicpu_engine_common.so ${ASCEND_RUNTIME_PATH}
|
||||
${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH})
|
||||
${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
|
||||
target_link_libraries(mindspore_shared_lib PRIVATE -Wl,--no-as-needed graph ${ge_compiler}
|
||||
${acl_retr} ${acl_cblas} ${acl_dvpp} ${acl_runtime} ${libplatform} ${libcompress} ${libopskernel}
|
||||
|
|
|
@ -42,7 +42,9 @@ if(ENABLE_D)
|
|||
set(ASCEND_TOOLKIT_RUNTIME_PATH ${ASCEND_PATH}/ascend-toolkit/latest/fwkacllib/lib64)
|
||||
set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64)
|
||||
set(ASCEND_CANN_RUNTIME_PATH ${ASCEND_PATH}/latest/fwkacllib/lib64)
|
||||
find_library(HCCL hccl ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH})
|
||||
set(ASCEND_CANN_RUNTIME_NEW_PATH ${ASCEND_PATH}/latest/lib64)
|
||||
find_library(HCCL hccl ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
file(GLOB_RECURSE ASCEND_COLLECTIVE_LIST "distribute/mpi_collective_group.cc"
|
||||
"distribute/collective_group_wrapper.cc")
|
||||
set_property(SOURCE ${ASCEND_COLLECTIVE_LIST}
|
||||
|
|
|
@ -86,11 +86,11 @@ void AicpuTask::Distribute() {
|
|||
rtArgsEx_t argsInfo = {};
|
||||
argsInfo.args = args_;
|
||||
argsInfo.argsSize = args_size;
|
||||
rt_ret = rtCpuKernelLaunchWithFlagV2(reinterpret_cast<const void *>(task_info_->so_name().data()),
|
||||
reinterpret_cast<const void *>(task_info_->kernel_name().data()), 1, &argsInfo,
|
||||
nullptr, stream_, cpu_flag);
|
||||
rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(task_info_->so_name().data()),
|
||||
reinterpret_cast<const void *>(task_info_->kernel_name().data()), 1, &argsInfo,
|
||||
nullptr, stream_, cpu_flag);
|
||||
if (rt_ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "Call rt api rtCpuKernelLaunchWithFlagV2 failed, ret: " << rt_ret;
|
||||
MS_LOG(EXCEPTION) << "Call rt api rtCpuKernelLaunchWithFlag failed, ret: " << rt_ret;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Distribute AicpuTask end.";
|
||||
|
|
|
@ -92,7 +92,7 @@ void TbeTask::Distribute() {
|
|||
rtArgsEx_t args_info = {};
|
||||
args_info.args = args_;
|
||||
args_info.argsSize = args_size;
|
||||
rt_ret = rtKernelLaunchWithFlagV2(stub_func_, task_info_->block_dim(), &args_info, nullptr, stream_, dump_flag);
|
||||
rt_ret = rtKernelLaunchWithFlag(stub_func_, task_info_->block_dim(), &args_info, nullptr, stream_, dump_flag);
|
||||
if (rt_ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "Call rt api rtKernelLaunch failed, ret: " << rt_ret << " mem size " << args_size;
|
||||
}
|
||||
|
|
|
@ -57,13 +57,8 @@ uint64_t GetProfilingModule() {
|
|||
}
|
||||
|
||||
Status ProfilingManager::PluginInit() const {
|
||||
if (prof_cb_.msprofReporterCallback == nullptr) {
|
||||
MS_LOG(ERROR) << "MsprofReporterCallback callback is nullptr.";
|
||||
return PROF_FAILED;
|
||||
}
|
||||
int32_t ret = prof_cb_.msprofReporterCallback(static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
|
||||
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_INIT),
|
||||
nullptr, 0);
|
||||
int32_t ret = MsprofReportData(static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
|
||||
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_INIT), nullptr, 0);
|
||||
if (ret != UintToInt(PROF_SUCCESS)) {
|
||||
MS_LOG(ERROR) << "MsprofReporter init failed, ret: " << ret;
|
||||
return PROF_FAILED;
|
||||
|
@ -72,13 +67,9 @@ Status ProfilingManager::PluginInit() const {
|
|||
}
|
||||
|
||||
void ProfilingManager::PluginUnInit() const {
|
||||
if (prof_cb_.msprofReporterCallback == nullptr) {
|
||||
MS_LOG(ERROR) << "MsprofReporterCallback callback is nullptr.";
|
||||
return;
|
||||
}
|
||||
int32_t cb_ret = prof_cb_.msprofReporterCallback(
|
||||
static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
|
||||
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_UNINIT), nullptr, 0);
|
||||
int32_t cb_ret =
|
||||
MsprofReportData(static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
|
||||
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_UNINIT), nullptr, 0);
|
||||
if (cb_ret != 0) {
|
||||
MS_LOG(WARNING) << "profiling plugin uninit failed, ret:%d" << cb_ret;
|
||||
}
|
||||
|
@ -130,7 +121,7 @@ bool ProfilingManager::InitProfiling(const std::string &profiling_path, uint32_t
|
|||
}
|
||||
|
||||
bool ProfilingManager::ProfRegisterCtrlCallback() const {
|
||||
rtError_t rt_ret = rtProfRegisterCtrlCallback(GE, CtrlCallbackHandle);
|
||||
rtError_t rt_ret = MsprofRegisterCallback(GE, CtrlCallbackHandle);
|
||||
if (rt_ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Call rtProfRegisterCtrlCallback failed.";
|
||||
return false;
|
||||
|
@ -161,14 +152,9 @@ void ProfilingManager::ReportErrorMessage() const {
|
|||
}
|
||||
|
||||
Status ProfilingManager::CallMsprofReport(const NotNull<ReporterData *> reporter_data) const {
|
||||
if (prof_cb_.msprofReporterCallback == nullptr) {
|
||||
MS_LOG(ERROR) << "MsprofReporterCallback callback is nullptr.";
|
||||
return PROF_FAILED;
|
||||
}
|
||||
int32_t ret =
|
||||
prof_cb_.msprofReporterCallback(static_cast<int32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
|
||||
static_cast<int32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_REPORT),
|
||||
static_cast<void *>(reporter_data.get()), sizeof(ReporterData));
|
||||
int32_t ret = MsprofReportData(static_cast<int32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
|
||||
static_cast<int32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_REPORT),
|
||||
static_cast<void *>(reporter_data.get()), sizeof(ReporterData));
|
||||
|
||||
if (ret != UintToInt(PROF_SUCCESS)) {
|
||||
ReportErrorMessage();
|
||||
|
@ -249,9 +235,9 @@ void ProfilingManager::QueryHashId(const int32_t &device_id, const std::string &
|
|||
hash_data.dataLen = src_str.size();
|
||||
hash_data.data = reinterpret_cast<unsigned char *>(const_cast<char *>(src_str.c_str()));
|
||||
|
||||
const int32_t ret = prof_cb_.msprofReporterCallback(
|
||||
static_cast<int32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
|
||||
static_cast<int32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_HASH), &hash_data, sizeof(MsprofHashData));
|
||||
const int32_t ret = MsprofReportData(static_cast<int32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
|
||||
static_cast<int32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_HASH),
|
||||
&hash_data, sizeof(MsprofHashData));
|
||||
if (ret != UintToInt(PROF_SUCCESS)) {
|
||||
ReportErrorMessage();
|
||||
MS_LOG(EXCEPTION) << "[Profiling] Query hash id of long string failed, src string is " << src_str.c_str()
|
||||
|
|
|
@ -38,7 +38,6 @@ namespace device {
|
|||
namespace ascend {
|
||||
struct MsprofCallback {
|
||||
MsprofCtrlCallback msprofCtrlCallback;
|
||||
MsprofSetDeviceCallback msprofSetDeviceCallback;
|
||||
MsprofReporterCallback msprofReporterCallback;
|
||||
};
|
||||
|
||||
|
@ -68,7 +67,6 @@ class ProfilingManager {
|
|||
const struct MsprofCallback &GetMsprofCallback() const { return prof_cb_; }
|
||||
void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; }
|
||||
void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; }
|
||||
void SetMsprofSetDeviceCallback(MsprofSetDeviceCallback func) { prof_cb_.msprofSetDeviceCallback = func; }
|
||||
Status GetProfConf(NotNull<MsprofGeOptions *> prof);
|
||||
Status ProfCommandHandle(ProfCommandHandleType type);
|
||||
Status ProfHandleInit();
|
||||
|
|
|
@ -6,10 +6,12 @@ set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/fwkacllib/l
|
|||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/fwkacllib/lib64)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/latest/fwkacllib/lib64)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/latest/lib64)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/nnae/latest/fwkacllib/lib64/plugin/opskernel)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/plugin/opskernel)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/latest/fwkacllib/lib64/plugin/opskernel)
|
||||
set(MINDSPORE_RPATH ${MINDSPORE_RPATH}:/usr/local/Ascend/latest/lib64/plugin/opskernel)
|
||||
set_target_properties(hccl_plugin PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
|
||||
|
||||
if(DEFINED ENV{D_LINK_PATH})
|
||||
|
@ -30,18 +32,23 @@ else()
|
|||
set(ASCEND_PATH /usr/local/Ascend)
|
||||
endif()
|
||||
set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64)
|
||||
set(ASCEND_CANN_RUNTIME_NEW_PATH ${ASCEND_PATH}/latest/lib64)
|
||||
set(ASCEND_PLUGIN_PATH ${ASCEND_RUNTIME_PATH}/plugin/opskernel)
|
||||
set(ASCEND_PLUGIN_NEW_PATH ${ASCEND_CANN_RUNTIME_NEW_PATH}/plugin/opskernel)
|
||||
set(ASCEND_TOOLKIT_RUNTIME_PATH ${ASCEND_PATH}/ascend-toolkit/latest/fwkacllib/lib64)
|
||||
set(ASCEND_TOOLKIT_PLUGIN_PATH ${ASCEND_TOOLKIT_RUNTIME_PATH}/plugin/opskernel)
|
||||
set(ASCEND_CANN_RUNTIME_PATH ${ASCEND_PATH}/latest/fwkacllib/lib64)
|
||||
set(ASCEND_CANN_PLUGIN_PATH ${ASCEND_CANN_RUNTIME_PATH}/plugin/opskernel)
|
||||
endif()
|
||||
|
||||
find_library(HCCL hccl ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH})
|
||||
find_library(REGISTER register ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH})
|
||||
find_library(HCCL hccl ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
find_library(REGISTER register ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
find_library(HCCL_ADPTER hcom_graph_adaptor ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_PATH})
|
||||
find_library(HCCL_RA ra ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH})
|
||||
${ASCEND_CANN_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
find_library(HCCL_RA ra ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
find_library(HCCL_BUILDER hcom_opskernel_builder ${ASCEND_PLUGIN_PATH} ${ASCEND_TOOLKIT_PLUGIN_PATH}
|
||||
${ASCEND_CANN_PLUGIN_PATH})
|
||||
${ASCEND_CANN_PLUGIN_PATH} ${ASCEND_PLUGIN_NEW_PATH})
|
||||
target_link_libraries(hccl_plugin -Wl,--no-as-needed ${HCCL} ${HCCL_ADPTER} ${REGISTER} ${HCCL_BUILDER} ${HCCL_RA})
|
||||
|
|
|
@ -165,9 +165,9 @@ bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::
|
|||
rtArgsEx_t argsInfo = {};
|
||||
argsInfo.args = args_.data();
|
||||
argsInfo.argsSize = static_cast<uint32_t>(args_.length());
|
||||
if (rtCpuKernelLaunchWithFlagV2(reinterpret_cast<const void *>(node_so_.c_str()),
|
||||
reinterpret_cast<const void *>(node_name_.c_str()), 1, &argsInfo, nullptr, stream_,
|
||||
flag) != RT_ERROR_NONE) {
|
||||
if (rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(node_so_.c_str()),
|
||||
reinterpret_cast<const void *>(node_name_.c_str()), 1, &argsInfo, nullptr, stream_,
|
||||
flag) != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Aicpu op launch failed!";
|
||||
|
||||
return false;
|
||||
|
|
|
@ -164,9 +164,9 @@ bool DynamicAicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, cons
|
|||
rtArgsEx_t argsInfo = {};
|
||||
argsInfo.args = args_.data();
|
||||
argsInfo.argsSize = static_cast<uint32_t>(args_.length());
|
||||
ret = rtCpuKernelLaunchWithFlagV2(reinterpret_cast<const void *>(node_so_.c_str()),
|
||||
reinterpret_cast<const void *>(node_name_.c_str()), 1, &argsInfo, nullptr, stream_,
|
||||
flag);
|
||||
ret =
|
||||
rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(node_so_.c_str()),
|
||||
reinterpret_cast<const void *>(node_name_.c_str()), 1, &argsInfo, nullptr, stream_, flag);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Aicpu op launch failed!";
|
||||
return false;
|
||||
|
|
|
@ -279,17 +279,16 @@ bool DynamicTbeKernelMod::Launch(const std::vector<AddressPtr> &inputs, const st
|
|||
auto args_size = static_cast<uint32_t>(UlongToUint(sizeof(void *)) * runtimeargs.size());
|
||||
auto node_info = cnode->fullname_with_scope();
|
||||
if (kernel_pack_->kernel_json_info().has_kernel_list) {
|
||||
const auto dev_func = std::to_string(tiling_key_);
|
||||
const auto kernel_info = node_info + "/" + std::to_string(tiling_key_);
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = device::KernelRuntime::LockRuntime(stream_ptr);
|
||||
rtArgsEx_t args_info = {};
|
||||
args_info.args = runtimeargs.data();
|
||||
args_info.argsSize = args_size;
|
||||
auto ret = rtKernelLaunchWithHandleV2(handle_, dev_func.c_str(), block_dim_, &args_info, l2ctrl, stream_ptr,
|
||||
kernel_info.c_str());
|
||||
auto ret =
|
||||
rtKernelLaunchWithHandle(handle_, tiling_key_, block_dim_, &args_info, l2ctrl, stream_ptr, kernel_info.c_str());
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Call runtime rtKernelLaunchWithHandleV2 error. Node info: " << node_info;
|
||||
MS_LOG(ERROR) << "Call runtime rtKernelLaunchWithHandle error. Node info: " << node_info;
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -175,7 +175,6 @@ void OpTilingCalculateAdapter::ConvertAtomicCompileInfo(const CNodePtr &node, ::
|
|||
(void)::ge::AttrUtils::SetStr(*(*op_desc), ATOMIC_COMPILE_INFO_KEY, atomic_info_key);
|
||||
(void)::ge::AttrUtils::SetStr(*(*op_desc), ATOMIC_COMPILE_INFO_JSON, atomic_compile_info);
|
||||
}
|
||||
|
||||
// clean output
|
||||
if (has_output) {
|
||||
vector<int64_t> output_indexs;
|
||||
|
|
|
@ -81,7 +81,8 @@ void ReduceEltwiseFusionPass::MatchSingleFusionPattern(const session::KernelGrap
|
|||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (AnfAlgo::GetKernelType(cnode) == KernelType::TBE_KERNEL &&
|
||||
AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE) {
|
||||
AnfAlgo::GetFusionType(cnode) == kernel::FusionType::ELEMWISE && cnode->inputs().size() == ELTWISE_INPUT_SIZE &&
|
||||
common::AnfAlgo::GetCNodeName(cnode) != kCastOpName) {
|
||||
MatchReduceEltwise(cnode, kernel_graph, candidate_fusion);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -479,6 +479,7 @@ if(MSLITE_ENABLE_ACL)
|
|||
set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64)
|
||||
set(ASCEND_TOOLKIT_RUNTIME_PATH ${ASCEND_PATH}/ascend-toolkit/latest/fwkacllib/lib64)
|
||||
set(ASCEND_CANN_RUNTIME_PATH ${ASCEND_PATH}/latest/fwkacllib/lib64)
|
||||
set(ASCEND_CANN_RUNTIME_NEW_PATH ${ASCEND_PATH}/latest/lib64)
|
||||
endif()
|
||||
|
||||
set(PKG_NAME_PREFIX mindspore-lite-${VERSION_STR})
|
||||
|
|
|
@ -422,7 +422,8 @@ add_dependencies(lite_src_mid fbs_src)
|
|||
if(MSLITE_ENABLE_ACL AND NOT MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
|
||||
include_directories(${TOP_DIR}/graphengine/inc/external)
|
||||
add_subdirectory(extendrt/kernel/ascend)
|
||||
link_directories(${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH})
|
||||
link_directories(${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
endif()
|
||||
|
||||
add_library(mindspore-lite SHARED $<TARGET_OBJECTS:lite_src_mid>)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
include_directories(${TOP_DIR}/graphengine/inc/external)
|
||||
|
||||
find_library(ge_graph libgraph.so ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH})
|
||||
find_library(ge_graph libgraph.so ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
|
||||
aux_source_directory(src ACL_SRC)
|
||||
add_library(ascend_kernel_mid OBJECT ${ACL_SRC})
|
||||
|
|
|
@ -103,7 +103,8 @@ if(MSLITE_ENABLE_ACL)
|
|||
include(${TOP_DIR}/cmake/dependency_graphengine.cmake)
|
||||
endif()
|
||||
add_subdirectory(adapter/acl)
|
||||
link_directories(${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH})
|
||||
link_directories(${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH} ${ASCEND_CANN_RUNTIME_PATH}
|
||||
${ASCEND_CANN_RUNTIME_NEW_PATH})
|
||||
endif()
|
||||
|
||||
file(GLOB CXX_API_SRCS
|
||||
|
|
|
@ -21,7 +21,7 @@ import traceback
|
|||
from datetime import datetime
|
||||
|
||||
from tbe.common.rl_bank.bank_manager import set_current_op_name
|
||||
from tbe.common.repository_manager.interface import cann_kb_unload, cann_kb_load
|
||||
from tbe.common.repository_manager.interface import cann_kb_finalize, cann_kb_init
|
||||
from tbe.common.rl_bank.bank_cfg import LocalLock
|
||||
from te.platform.cce_conf import te_set_version
|
||||
from te.platform.cce_policy import set_L1_info
|
||||
|
@ -74,32 +74,25 @@ def _tune_init(job: TbeJob):
|
|||
return res
|
||||
|
||||
|
||||
def _cann_kb_load(job: TbeJob):
|
||||
def _cann_kb_init(job: TbeJob):
|
||||
"""
|
||||
database load
|
||||
database init
|
||||
:param job:
|
||||
:return:
|
||||
"""
|
||||
soc_version = job.soc_version
|
||||
core_num = job.core_num
|
||||
op_bank_path = job.op_bank_path
|
||||
sys_config = {"soc_version": job.soc_version, "core_num": job.core_num}
|
||||
load_config = {"op_bank_path": job.op_bank_path}
|
||||
kb_type = None
|
||||
res = cann_kb_load(soc_version, core_num, op_bank_path, kb_type)
|
||||
res = cann_kb_init(sys_config, load_config, kb_type)
|
||||
return res
|
||||
|
||||
|
||||
def _cann_kb_unload(job: TbeJob):
|
||||
def _cann_kb_finalize():
|
||||
"""
|
||||
database unload
|
||||
:param job:
|
||||
database finalize
|
||||
:return:
|
||||
"""
|
||||
if job is None:
|
||||
return 0
|
||||
soc_version = job.soc_version
|
||||
core_num = job.core_num
|
||||
kb_type = None
|
||||
res = cann_kb_unload(soc_version, core_num, kb_type)
|
||||
res = cann_kb_finalize()
|
||||
return res
|
||||
|
||||
|
||||
|
@ -258,7 +251,7 @@ def tbe_initialize(job: TbeJob):
|
|||
local_lock = LocalLock(lock_file)
|
||||
try:
|
||||
local_lock.lock()
|
||||
res = _cann_kb_load(job)
|
||||
res = _cann_kb_init(job)
|
||||
if res == 1:
|
||||
job.error("Cann kb load failed")
|
||||
res = _parallel_compilation_init(job)
|
||||
|
@ -668,7 +661,7 @@ def tbe_finalize(auto_tiling_mode, offline_tune, job: TbeJob):
|
|||
if "RL" in auto_tiling_mode or offline_tune:
|
||||
from schedule_search.rl_online_tune import rl_tune_deinit
|
||||
rl_tune_deinit()
|
||||
res = _cann_kb_unload(job)
|
||||
res = _cann_kb_finalize()
|
||||
if res == 1:
|
||||
job.error("Cann kb unload failed")
|
||||
return False
|
||||
|
|
|
@ -214,6 +214,7 @@ def get_options_info(job_content):
|
|||
options["deviceId"] = job_content["SocInfo"]["deviceId"]
|
||||
options["autoTilingMode"] = job_content["SocInfo"]["autoTilingMode"]
|
||||
options["op_impl_mode_list"] = job_content["SocInfo"]["op_impl_mode_list"]
|
||||
options["status_check"] = "false"
|
||||
return options
|
||||
|
||||
|
||||
|
|
|
@ -33,15 +33,16 @@ gru_v2_hidden_grad_cell_op_info = TBERegOp("GRUV2HiddenGradCell") \
|
|||
.input(5, "reset", False, "required", "all") \
|
||||
.input(6, "new", False, "required", "all") \
|
||||
.input(7, "hidden_new", False, "required", "all") \
|
||||
.input(8, "seq_mask", False, "optional", "all") \
|
||||
.output(0, "dh_preh", False, "required", "all") \
|
||||
.output(1, "dgate_h", False, "required", "all") \
|
||||
.output(2, "dnt_x", False, "required", "all") \
|
||||
.dtype_format(DataType.F32_FracNZ, DataType.F32_FracNZ, DataType.F32_FracNZ, DataType.F32_FracNZ,
|
||||
DataType.F32_FracNZ, DataType.F32_FracNZ, DataType.F32_FracNZ, DataType.F32_FracNZ,
|
||||
DataType.F32_FracNZ, DataType.F32_FracNZ, DataType.F32_FracNZ) \
|
||||
DataType.F32_FracNZ, DataType.F32_FracNZ, DataType.F32_FracNZ, DataType.F16_FracNZ) \
|
||||
.dtype_format(DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ,
|
||||
DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ,
|
||||
DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ) \
|
||||
DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ, DataType.F16_FracNZ) \
|
||||
.get_op_info()
|
||||
|
||||
|
||||
|
|
|
@ -124,6 +124,8 @@ class FlopsParser:
|
|||
step_idx = 0
|
||||
aicore_file_doc = os.path.join(self._input_dir, "data")
|
||||
source_files = self._get_aicore_files(aicore_file_doc)
|
||||
if not source_files:
|
||||
return
|
||||
# parse all sliced aicore files.
|
||||
for source_file in source_files:
|
||||
source_file = validate_and_normalize_path(source_file)
|
||||
|
@ -418,7 +420,8 @@ class FlopsParser:
|
|||
"""Get aicore files."""
|
||||
aicore_files = self._search_file(profiler_dir)
|
||||
if not aicore_files:
|
||||
raise ProfilerPathErrorException('The aicore file does not exist.')
|
||||
logger.warning("Aicore file does not exist.")
|
||||
return[]
|
||||
|
||||
return aicore_files
|
||||
|
||||
|
|
|
@ -215,7 +215,7 @@ class AscendEnvChecker(EnvChecker):
|
|||
"""ascend environment check"""
|
||||
|
||||
def __init__(self):
|
||||
self.version = ["1.81"]
|
||||
self.version = ["1.82"]
|
||||
atlas_nnae_version = "/usr/local/Ascend/nnae/latest/fwkacllib/version.info"
|
||||
atlas_toolkit_version = "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/version.info"
|
||||
hisi_fwk_version = "/usr/local/Ascend/latest/fwkacllib/version.info"
|
||||
|
|
|
@ -63,7 +63,7 @@ def thor_end():
|
|||
thor_cost /= 4
|
||||
print(f"resnet thor_loss: {thor_loss}, thor_cost: {thor_cost}")
|
||||
assert thor_loss < 7
|
||||
assert thor_cost < 30
|
||||
assert thor_cost < 40
|
||||
for i in range(4):
|
||||
shutil.rmtree(os.path.join(sh_path, f"train_parallel{i+4}"))
|
||||
|
||||
|
|
|
@ -469,7 +469,7 @@ def onp_average(x):
|
|||
return a, b, c, d, e, f, g, h, i
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
|
@ -1283,7 +1283,7 @@ def onp_kron(x, y):
|
|||
return onp.kron(x, y)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
|
@ -1303,7 +1303,7 @@ def test_kron():
|
|||
match_res(mnp.kron, onp.kron, x, y)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
|
@ -1437,7 +1437,7 @@ def onp_diff(input_array):
|
|||
return a, b, c, d, e, f, g
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
|
@ -1743,7 +1743,7 @@ def test_convolve():
|
|||
match_all_arrays(mnp_res, onp_res)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
|
@ -2003,7 +2003,7 @@ def test_mean():
|
|||
run_multi_test(mnp_mean, onp_mean, test_case.scalars, error=3)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
|
@ -2034,7 +2034,7 @@ def test_multi_dot():
|
|||
match_all_arrays(mnp.multi_dot(mnp_arrays[1:-1]), onp.linalg.multi_dot(arrays[1:-1]))
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
|
@ -2148,7 +2148,7 @@ def test_bincount():
|
|||
onp.bincount(x, weights, minlength=25), error=3)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
|
@ -2472,7 +2472,7 @@ def test_result_type():
|
|||
assert actual == expected
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
|
@ -2555,7 +2555,7 @@ def test_ravel_multi_index():
|
|||
match_array(actual.asnumpy(), expected, error=5)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.level1
|
||||
@pytest.mark.platform_arm_ascend_training
|
||||
@pytest.mark.platform_x86_ascend_training
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
|
|
|
@ -90,3 +90,22 @@ ACL_FUNC_VISIBILITY aclprofConfig *aclprofCreateConfig(uint32_t *deviceIdList, u
|
|||
}
|
||||
|
||||
ACL_FUNC_VISIBILITY aclError aclprofDestroyConfig(const aclprofConfig *profilerConfig) { return ACL_SUCCESS; }
|
||||
|
||||
/**
|
||||
* @name profRegisterCallback
|
||||
* @brief register callback to profiling
|
||||
* @param moduleId [IN] module Id
|
||||
* @param handle [IN] the pointer of callback
|
||||
*/
|
||||
MSVP_PROF_API int32_t MsprofRegisterCallback(uint32_t moduleId, ProfCommandHandle handle) { return 0; }
|
||||
|
||||
/*
|
||||
* @name profReportData
|
||||
* @brief start reporter/stop reporter/report date
|
||||
* @param moduleId [IN] enum profReporterModuleId
|
||||
* @param type [IN] enum profReporterCallbackType
|
||||
* @param data [IN] data (nullptr on INTI/UNINIT)
|
||||
* @param len [IN] data size (0 on INIT/UNINIT)
|
||||
* @return enum MsprofErrorCod
|
||||
*/
|
||||
MSVP_PROF_API int32_t MsprofReportData(uint32_t moduleId, uint32_t type, void* data, uint32_t len) { return 0; }
|
||||
|
|
|
@ -188,12 +188,6 @@ RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream) { return
|
|||
RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *lbl, rtModel_t mdl, rtStream_t stm) { return RT_ERROR_NONE; }
|
||||
|
||||
RTS_API rtError_t rtCpuKernelLaunchWithFlag(const void *soName, const void *kernelName, uint32_t blockDim,
|
||||
const rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm,
|
||||
uint32_t flags) {
|
||||
return RT_ERROR_NONE;
|
||||
}
|
||||
|
||||
RTS_API rtError_t rtCpuKernelLaunchWithFlagV2(const void *soName, const void *kernelName, uint32_t blockDim,
|
||||
const rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm,
|
||||
uint32_t flags) {
|
||||
return RT_ERROR_NONE;
|
||||
|
@ -210,11 +204,6 @@ RTS_API rtError_t rtProfilerTraceEx(uint64_t id, uint64_t modelId, uint16_t tagI
|
|||
}
|
||||
|
||||
RTS_API rtError_t rtKernelLaunchWithFlag(const void *stubFunc, uint32_t blockDim, rtArgsEx_t *argsInfo,
|
||||
rtSmDesc_t *smDesc, rtStream_t stm, uint32_t flags) {
|
||||
return RT_ERROR_NONE;
|
||||
}
|
||||
|
||||
RTS_API rtError_t rtKernelLaunchWithFlagV2(const void *stubFunc, uint32_t blockDim, rtArgsEx_t *argsInfo,
|
||||
rtSmDesc_t *smDesc, rtStream_t stm, uint32_t flags) {
|
||||
return RT_ERROR_NONE;
|
||||
}
|
||||
|
@ -225,14 +214,8 @@ RTS_API rtError_t rtProfRegisterCtrlCallback(uint32_t moduleId, rtProfCtrlHandle
|
|||
|
||||
RTS_API rtError_t rtGetRtCapability(rtFeatureType_t, int32_t, int64_t *) { return RT_ERROR_NONE; }
|
||||
|
||||
RTS_API rtError_t rtKernelLaunchWithHandle(void *hdl, const void *kernelInfoExt, uint32_t blockDim,
|
||||
RTS_API rtError_t rtKernelLaunchWithHandle(void *hdl, const uint64_t tilingKey, uint32_t blockDim,
|
||||
rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm,
|
||||
const void *kernelInfo) {
|
||||
return RT_ERROR_NONE;
|
||||
}
|
||||
|
||||
RTS_API rtError_t rtKernelLaunchWithHandleV2(void *hdl, const void *kernelInfoExt, uint32_t blockDim,
|
||||
rtArgsEx_t *argsInfo, rtSmDesc_t *smDesc, rtStream_t stm,
|
||||
const void *kernelInfo) {
|
||||
return RT_ERROR_NONE;
|
||||
}
|
||||
|
|
|
@ -1 +1 @@
|
|||
1.7.0
|
||||
1.8.0
|
Loading…
Reference in New Issue