!26310 MindSpore support load custom aicpu kernels.

Merge pull request !26310 from linqingke/aicpu
This commit is contained in:
i-robot 2021-11-18 08:09:26 +00:00 committed by Gitee
commit 9d6248194e
12 changed files with 568 additions and 32 deletions

View File

@ -31,6 +31,7 @@
#include "proto/node_def.pb.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include "backend/kernel_compiler/aicpu/aicpu_kernel_load.h"
#include "backend/session/kernel_graph.h"
#include "backend/kernel_compiler/common_utils.h"
#include "backend/kernel_compiler/oplib/oplib.h"
@ -423,6 +424,11 @@ KernelModPtr AicpuOpBuild(const std::shared_ptr<AnfNode> &anf_node) {
if (!SetIOSize(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Set input output size list failed.";
}
if (!AicpuOpKernelLoad::GetInstance().LoadAicpuKernelSo(anf_node, kernel_mod_ptr)) {
MS_LOG(EXCEPTION) << "Aicpu kernel so load failed. task is " << anf_node->fullname_with_scope();
}
return kernel_mod_ptr;
}
} // namespace kernel

View File

@ -0,0 +1,373 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/aicpu/aicpu_kernel_load.h"
#include <dlfcn.h>
#include <unistd.h>
#include <utility>
#include <string>
#include <ios>
#include <fstream>
#include "runtime/kernel.h"
#include "runtime/mem.h"
#include "runtime/context.h"
#include "utils/utils.h"
#include "utils/file_utils.h"
#include "backend/session/anf_runtime_algorithm.h"
namespace mindspore {
namespace kernel {
bool AicpuOpKernelLoad::GetBinaryFileName(const std::string &so_name, const std::string &bin_folder_path,
std::string *bin_file_path) {
MS_EXCEPTION_IF_NULL(bin_file_path);
const auto &iter = so_name_and_realpath_map_.find(so_name);
if (iter != so_name_and_realpath_map_.end()) {
*bin_file_path = iter->second;
MS_LOG(INFO) << "so " << so_name << " has bin file path " << bin_file_path;
return true;
}
std::string bin_file_name(bin_folder_path);
if (bin_file_name.empty()) {
bin_file_name = "./";
} else if (bin_file_name.back() != '/') {
bin_file_name.append("/");
}
bin_file_name += so_name;
auto real_file_path = FileUtils::GetRealPath(bin_file_name.c_str());
if (!real_file_path.has_value()) {
MS_LOG(ERROR) << "Get real path failed, path=" << bin_file_name;
return false;
}
auto real_file_path_value = real_file_path.value();
if (access(real_file_path_value.c_str(), F_OK) == -1) {
MS_LOG(ERROR) << "Kernel so path:" << real_file_path_value << " is not existed!";
return false;
}
*bin_file_path = real_file_path_value;
so_name_and_realpath_map_[so_name] = *bin_file_path;
return true;
}
bool AicpuOpKernelLoad::ReadBytesFromBinaryFile(const std::string &file_name, std::vector<char> *buffer) const {
std::ifstream file(file_name.c_str(), std::ios::binary | std::ios::ate);
if (!file.is_open()) {
MS_LOG(ERROR) << "Open file [" << file_name << "] failed";
return false;
}
std::streamsize size = file.tellg();
if (size <= 0) {
file.close();
MS_LOG(ERROR) << "Empty file [" << file_name << "], please check this file.";
return false;
}
if (size > INT_MAX) {
file.close();
MS_LOG(ERROR) << "File [" << file_name << "] size [" << size << "] is out of limit[" << INT_MAX << "]";
return false;
}
file.seekg(0, std::ios::beg);
buffer->resize(size);
file.read(buffer->data(), size);
file.close();
return true;
}
bool AicpuOpKernelLoad::GetSoNeedLoadPath(const std::string &so_name, std::string *file_path) const {
MS_EXCEPTION_IF_NULL(file_path);
Dl_info dl_info;
if (dladdr(reinterpret_cast<void *>(const_cast<AicpuOpKernelLoad *>(this)), &dl_info) == 0) {
MS_LOG(ERROR) << "Get dladdr failed!";
return false;
}
std::string cust_kernel_so_path(dl_info.dli_fname);
auto pos = cust_kernel_so_path.find_last_of('/');
if (cust_kernel_so_path.empty() || pos == std::string::npos) {
MS_LOG(ERROR) << "Current path [" << cust_kernel_so_path << "] is invalid.";
return false;
}
auto real_cust_kernel_so_path = cust_kernel_so_path.substr(0, pos) + "/lib/";
if (real_cust_kernel_so_path.size() > PATH_MAX) {
MS_LOG(ERROR) << "Current path [" << real_cust_kernel_so_path << "] is too long.";
return false;
}
*file_path = real_cust_kernel_so_path;
return true;
}
bool AicpuOpKernelLoad::PackageBinaryFile(const std::string &so_name,
std::map<std::string, OpKernelBinPtr> *so_name_with_bin_info) {
std::string bin_folder_path;
bool ret = GetSoNeedLoadPath(so_name, &bin_folder_path);
if (!ret) {
MS_LOG(ERROR) << "GetSoNeedLoadPath failed.";
return false;
}
std::string bin_file_path;
ret = GetBinaryFileName(so_name, bin_folder_path, &bin_file_path);
if (!ret) {
MS_LOG(ERROR) << "GetBinaryFileName failed.";
return false;
}
std::vector<char> buffer;
ret = ReadBytesFromBinaryFile(bin_file_path, &buffer);
if (!ret) {
MS_LOG(ERROR) << "ReadBytesFromBinaryFile failed.";
return false;
}
OpKernelBinPtr cust_aicpu_kernel_ptr = std::make_shared<OpKernelBin>(so_name, std::move(buffer));
if (cust_aicpu_kernel_ptr == nullptr) {
MS_LOG(ERROR) << "Create OpKernelBin object failed.";
return false;
}
so_name_with_bin_info->insert({so_name, cust_aicpu_kernel_ptr});
return true;
}
bool AicpuOpKernelLoad::LoadAicpuKernelSo(const AnfNodePtr &node,
const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr) {
std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
MS_EXCEPTION_IF_NULL(node);
MS_EXCEPTION_IF_NULL(kernel_mod_ptr);
CNodePtr cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (!AnfAlgo::HasNodeAttr(kAttrCustAicpu, cnode)) {
MS_LOG(INFO) << "Current aicpu ops:" << cnode->fullname_with_scope() << " isn't a custom ops.";
return true;
}
std::string so_name = "lib" + AnfAlgo::GetNodeAttr<std::string>(cnode, kAttrCustAicpu) + ".so";
if (so_name == kLibAicpuKernelSoName || so_name == kLibCpuKernelSoName) {
MS_LOG(INFO) << "Aicpu so:" << so_name << " is default so.";
return true;
}
kernel_mod_ptr->SetCustSo(so_name);
rtContext_t rt_cur_ctx = nullptr;
auto rt_error = rtCtxGetCurrent(&rt_cur_ctx);
if (rt_error != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtCtxGetCurrent failed, ret = 0x" << rt_error;
return false;
}
// use current context as resource key
uintptr_t resource_id = reinterpret_cast<uintptr_t>(rt_cur_ctx);
auto it = cust_aicpu_so_.find(resource_id);
if (it != cust_aicpu_so_.end()) {
auto it_so_name = it->second.find(so_name);
if (it_so_name != it->second.end()) {
MS_LOG(INFO) << "Cust aicpu so:" << so_name << " has been loaded.";
return true;
}
}
std::map<std::string, OpKernelBinPtr> so_name_with_bin_info;
if (!PackageBinaryFile(so_name, &so_name_with_bin_info)) {
MS_LOG(ERROR) << "Package binary file failed.";
return false;
}
if (it == cust_aicpu_so_.end()) {
cust_aicpu_so_[resource_id] = so_name_with_bin_info;
MS_LOG(INFO) << "Load new aicpu so:" << so_name << "success, resource id:" << resource_id << ".";
return true;
}
auto it_so_name = it->second.find(so_name);
if (it_so_name == it->second.end()) {
it->second.insert(so_name_with_bin_info.begin(), so_name_with_bin_info.end());
MS_LOG(INFO) << "Load cust aicpu so:" << so_name << "success, resource id:" << resource_id << ".";
return true;
}
return true;
}
bool AicpuOpKernelLoad::CacheBinaryFileToDevice(const uintptr_t &resource_id, std::vector<void *> *allocated_mem,
void **batch_args) {
auto it = cust_aicpu_so_.find(resource_id);
if (it == cust_aicpu_so_.end()) {
MS_LOG(ERROR) << "Context id:" << resource_id << " is invalid.";
return false;
}
rtError_t status;
std::vector<CustAicpuSoBuf> v_cust_so;
for (const auto &it_so : it->second) {
const auto &so_name = it_so.first;
const void *aicpu_data = it_so.second->GetBinData();
uint32_t aicpu_data_length = it_so.second->GetBinDataSize();
void *d_aicpu_data = nullptr;
void *d_so_name = nullptr;
status = rtMalloc(&d_aicpu_data, aicpu_data_length, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtMalloc failed, size:" << aicpu_data_length << ", ret = 0x" << status;
return false;
}
allocated_mem->emplace_back(d_aicpu_data);
status = rtMalloc(&d_so_name, so_name.size(), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtMalloc failed, size:" << so_name.size() << ", ret = 0x" << status;
return false;
}
allocated_mem->emplace_back(d_so_name);
status = rtMemcpy(d_aicpu_data, aicpu_data_length, aicpu_data, aicpu_data_length, RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtMemcpy failed, ret = 0x" << status;
return false;
}
status = rtMemcpy(d_so_name, so_name.size(), reinterpret_cast<const void *>(so_name.c_str()), so_name.size(),
RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtMemcpy failed, ret = 0x" << status;
return false;
}
CustAicpuSoBuf cust_aicpu_so_buf;
cust_aicpu_so_buf.kernelSoBuf = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_aicpu_data));
cust_aicpu_so_buf.kernelSoBufLen = aicpu_data_length;
cust_aicpu_so_buf.kernelSoName = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_so_name));
cust_aicpu_so_buf.kernelSoNameLen = so_name.size();
v_cust_so.emplace_back(cust_aicpu_so_buf);
}
void *args = nullptr;
uint32_t args_size = sizeof(CustAicpuSoBuf) * v_cust_so.size();
status = rtMalloc(&args, args_size, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtMalloc failed, size:" << args_size << ", ret = 0x" << status;
return false;
}
allocated_mem->emplace_back(args);
status = rtMemcpy(args, args_size, v_cust_so.data(), args_size, RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtMemcpy failed, ret = 0x" << status;
return false;
}
BatchLoadOpFromBufArgs batch_cust_so;
batch_cust_so.soNum = v_cust_so.size();
batch_cust_so.args = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args));
uint32_t batch_args_size = sizeof(BatchLoadOpFromBufArgs);
status = rtMalloc(batch_args, batch_args_size, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtMalloc failed, size:" << batch_args_size << ", ret = 0x" << status;
return false;
}
allocated_mem->emplace_back(*batch_args);
status = rtMemcpy(*batch_args, batch_args_size, static_cast<void *>(&batch_cust_so), batch_args_size,
RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtMemcpy failed, ret = 0x" << status;
return false;
}
return true;
}
bool AicpuOpKernelLoad::LaunchAicpuKernelSo() {
std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
if (cust_aicpu_so_.empty()) {
return true;
}
rtContext_t rt_cur_ctx = nullptr;
rtError_t status = RT_ERROR_NONE;
status = rtCtxGetCurrent(&rt_cur_ctx);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtCtxGetCurrent failed, ret = 0x" << status;
return false;
}
// use current context as resource key
uintptr_t resource_id = reinterpret_cast<uintptr_t>(rt_cur_ctx);
auto it = cust_aicpu_so_.find(resource_id);
if (it == cust_aicpu_so_.end()) {
MS_LOG(INFO) << "Cust aicpu so map is empty, context id:" << resource_id;
return true;
}
std::vector<void *> allocated_mem;
void *batch_args = nullptr;
uint32_t batch_args_size = sizeof(BatchLoadOpFromBufArgs);
bool ret = CacheBinaryFileToDevice(resource_id, &allocated_mem, &batch_args);
allocated_mem_list_.emplace_back(std::move(allocated_mem));
if (!ret) {
MS_LOG(ERROR) << "CacheBinaryFileToDevice is failed.";
return false;
}
rtStream_t stream = nullptr;
status = rtStreamCreate(&stream, 0);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtStreamCreate failed, ret = 0x" << status;
return false;
}
stream_list_.emplace_back(stream);
// launch "batchLoadsoFrombuf" event to device.
std::string load_event(kBatchLoadBuf);
status = rtCpuKernelLaunch(nullptr, load_event.c_str(), 1, batch_args, batch_args_size, nullptr, stream);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtCpuKernelLaunch failed, ret = 0x" << status;
return false;
}
status = rtStreamSynchronize(stream);
if (status != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtStreamSynchronize failed, ret = 0x" << status;
return false;
}
MS_LOG(INFO) << "Aicpu kernel so launch success.";
return true;
}
void AicpuOpKernelLoad::FreeDeviceMemory() {
for (auto allocated_mem : allocated_mem_list_) {
for (auto mem : allocated_mem) {
if (mem == nullptr) {
continue;
}
auto rt_error = rtFree(mem);
if (rt_error != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Call rtFree failed, ret = 0x" << rt_error;
}
}
}
for (auto stream : stream_list_) {
if (stream != nullptr) {
auto rt_error = rtStreamDestroy(stream);
if (rt_error != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Call rtStreamDestroy failed, ret = 0x" << rt_error;
}
}
}
so_name_and_realpath_map_.clear();
cust_aicpu_so_.clear();
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,78 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_KERNEL_LOAD_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_KERNEL_LOAD_H_
#include <memory>
#include <string>
#include <map>
#include <vector>
#include "runtime/base.h"
#include "base/base.h"
#include "ir/anf.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
namespace mindspore {
namespace kernel {
constexpr auto kBatchLoadBuf = "batchLoadsoFrombuf";
#pragma pack(push, 1)
struct CustAicpuSoBuf {
uint64_t kernelSoBuf;
uint32_t kernelSoBufLen;
uint64_t kernelSoName;
uint32_t kernelSoNameLen;
};
struct BatchLoadOpFromBufArgs {
uint32_t soNum;
uint64_t args;
};
#pragma pack(pop)
class AicpuOpKernelLoad {
public:
AicpuOpKernelLoad() = default;
~AicpuOpKernelLoad() = default;
static AicpuOpKernelLoad &GetInstance() {
static AicpuOpKernelLoad instance;
return instance;
}
bool LaunchAicpuKernelSo();
bool LoadAicpuKernelSo(const AnfNodePtr &node, const std::shared_ptr<AicpuOpKernelMod> &kernel_mod_ptr);
void FreeDeviceMemory();
private:
bool GetBinaryFileName(const std::string &so_name, const std::string &bin_folder_path, std::string *bin_file_path);
bool ReadBytesFromBinaryFile(const std::string &file_name, std::vector<char> *buffer) const;
bool GetSoNeedLoadPath(const std::string &so_name, std::string *file_path) const;
bool PackageBinaryFile(const std::string &so_name, std::map<std::string, OpKernelBinPtr> *so_name_with_bin_info);
bool CacheBinaryFileToDevice(const uintptr_t &resource_id, std::vector<void *> *allocated_mem, void **batch_args);
std::map<std::string, std::string> so_name_and_realpath_map_;
std::map<uintptr_t, std::map<std::string, OpKernelBinPtr>> cust_aicpu_so_;
std::mutex cust_aicpu_mutex_;
std::vector<rtStream_t> stream_list_;
std::vector<std::vector<void *>> allocated_mem_list_;
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_KERNEL_LOAD_H_

View File

@ -36,9 +36,6 @@ using HostDynamicKernel = mindspore::device::ascend::HostDynamicKernel;
namespace mindspore {
namespace kernel {
constexpr auto AICPU_OPS_SO_NAME = "libaicpu_kernels.so";
constexpr auto CUST_AICPU_OPS_SO_NAME = "libcpu_kernels.so";
AicpuOpKernelMod::AicpuOpKernelMod() : anf_node_(nullptr) {}
AicpuOpKernelMod::~AicpuOpKernelMod() {
@ -63,6 +60,10 @@ void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { o
void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); }
void AicpuOpKernelMod::SetExtInfo(const std::string &ext_info) { ext_info_ = ext_info; }
void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; }
void AicpuOpKernelMod::SetCustSo(const std::string &cust_so) {
node_so_ = cust_so;
cust_kernel_ = true;
}
void AicpuOpKernelMod::SetAnfNode(const mindspore::AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
anf_node_ = anf_node;
@ -72,15 +73,17 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs
const std::vector<AddressPtr> &outputs) {
MS_LOG(INFO) << "CreateCpuKernelInfoOffline start";
if (kCustAiCpuKernelOps.find(node_name_) != kCustAiCpuKernelOps.end()) {
node_so_ = CUST_AICPU_OPS_SO_NAME;
node_name_ = kCustRunApi;
if (!cust_kernel_) {
if (kCpuKernelOps.find(node_name_) != kCpuKernelOps.end()) {
node_so_ = kLibCpuKernelSoName;
node_name_ = kCpuRunApi;
} else if (kCacheKernelOps.find(node_name_) != kCacheKernelOps.end()) {
node_so_ = AICPU_OPS_SO_NAME;
node_name_ = kCustRunApi;
node_so_ = kLibAicpuKernelSoName;
node_name_ = kCpuRunApi;
} else {
if (node_so_ != CUST_AICPU_OPS_SO_NAME) {
node_so_ = AICPU_OPS_SO_NAME;
if (node_so_ != kLibCpuKernelSoName) {
node_so_ = kLibAicpuKernelSoName;
}
}
}
// InputOutputAddr
@ -149,12 +152,16 @@ bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::
if (node_name_ == kStack) {
node_name_ = kPack;
}
auto flag = RT_KERNEL_DEFAULT;
if (cust_kernel_) {
flag = RT_KERNEL_CUSTOM_AICPU;
}
MS_LOG(INFO) << "Aicpu launch, node_so_:" << node_so_ << ", node name:" << node_name_
<< ", args_size:" << args_.length();
if (rtCpuKernelLaunch(reinterpret_cast<const void *>(node_so_.c_str()),
if (rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(node_so_.c_str()),
reinterpret_cast<const void *>(node_name_.c_str()), 1,
reinterpret_cast<const void *>(args_.data()), static_cast<uint32_t>(args_.length()), nullptr,
stream_) != RT_ERROR_NONE) {
reinterpret_cast<const void *>(args_.data()), static_cast<uint32_t>(args_.length()),
nullptr, stream_, flag) != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Aicpu op launch failed!";
return false;
@ -168,15 +175,17 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
MS_LOG(INFO) << "AicpuOpKernelMod GenTask start";
stream_id_ = stream_id;
if (kCustAiCpuKernelOps.find(node_name_) != kCustAiCpuKernelOps.end()) {
node_so_ = CUST_AICPU_OPS_SO_NAME;
node_name_ = kCustRunApi;
if (!cust_kernel_) {
if (kCpuKernelOps.find(node_name_) != kCpuKernelOps.end()) {
node_so_ = kLibCpuKernelSoName;
node_name_ = kCpuRunApi;
} else if (kCacheKernelOps.find(node_name_) != kCacheKernelOps.end()) {
node_so_ = AICPU_OPS_SO_NAME;
node_name_ = kCustRunApi;
node_so_ = kLibAicpuKernelSoName;
node_name_ = kCpuRunApi;
} else {
if (node_so_ != CUST_AICPU_OPS_SO_NAME) {
node_so_ = AICPU_OPS_SO_NAME;
if (node_so_ != kLibCpuKernelSoName) {
node_so_ = kLibAicpuKernelSoName;
}
}
}
std::vector<void *> input_data_addrs;
@ -197,7 +206,7 @@ std::vector<TaskInfoPtr> AicpuOpKernelMod::GenTask(const std::vector<AddressPtr>
AicpuTaskInfoPtr task_info_ptr = std::make_shared<mindspore::ge::model_runner::AicpuTaskInfo>(
unique_name_, stream_id, node_so_, node_name_, node_def_str_, ext_info_, input_data_addrs, output_data_addrs,
NeedDump());
NeedDump(), cust_kernel_);
MS_LOG(INFO) << "AicpuOpKernelMod GenTask end";
return {task_info_ptr};

View File

@ -39,6 +39,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
void SetNodeDef(const std::string &nodeDef);
void SetExtInfo(const std::string &ext_info);
void SetNodeName(const std::string &node_name);
void SetCustSo(const std::string &cust_so);
/**
* @brief Build AICPU Engine kernel structure, and allocate device memory for offline task generate
@ -56,6 +57,7 @@ class AicpuOpKernelMod : public AscendKernelMod {
const std::vector<size_t> &GetWorkspaceSizeList() const override;
private:
bool cust_kernel_{false};
std::string args_;
std::string node_def_str_;
std::string node_name_;

View File

@ -17,6 +17,8 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_AICPU_UTIL_H_
#include <cstdint>
#include <utility>
#include <memory>
#include <vector>
#include <map>
#include <set>
@ -24,6 +26,8 @@
#include "backend/kernel_compiler/kernel.h"
namespace mindspore {
namespace kernel {
constexpr auto kLibAicpuKernelSoName = "libaicpu_kernels.so";
constexpr auto kLibCpuKernelSoName = "libcpu_kernels.so";
constexpr auto kInitDataSetQueue = "InitDataSetQueue";
constexpr auto kInitData = "InitData";
constexpr auto kGetNext = "GetNext";
@ -55,7 +59,7 @@ constexpr auto kUpdateCache = "UpdateCache";
constexpr auto kCacheSwapTable = "CacheSwapTable";
constexpr auto kSubAndFilter = "SubAndFilter";
constexpr auto kPadAndShift = "PadAndShift";
constexpr auto kCustRunApi = "RunCpuKernel";
constexpr auto kCpuRunApi = "RunCpuKernel";
constexpr auto kDropout2D = "Dropout2D";
constexpr auto kDropout3D = "Dropout3D";
constexpr auto kMaskedSelect = "MaskedSelect";
@ -65,7 +69,7 @@ constexpr auto kSearchSorted = "SearchSorted";
constexpr auto kResizeBilinear = "ResizeBilinear";
constexpr auto kResizeBilinearGrad = "ResizeBilinearGrad";
constexpr auto kScatterElements = "ScatterElements";
const std::set<std::string> kCustAiCpuKernelOps{kIdentity, kMaskedSelect, kMaskedSelectGrad, kDynamicStitch,
const std::set<std::string> kCpuKernelOps{kIdentity, kMaskedSelect, kMaskedSelectGrad, kDynamicStitch,
kSearchSorted, kResizeBilinear, kResizeBilinearGrad, kScatterElements};
const std::set<std::string> kCacheKernelOps{kUpdateCache, kCacheSwapTable, kSubAndFilter,
kPadAndShift, kDropout3D, kDropout2D};
@ -118,6 +122,24 @@ class AicpuOpUtil {
// kernel id
static uint64_t KernelId_;
};
class OpKernelBin {
public:
OpKernelBin(std::string name, std::vector<char> &&data) : name_(std::move(name)), data_(std::move(data)) {}
~OpKernelBin() = default;
const std::string &GetName() const { return name_; }
const uint8_t *GetBinData() const { return (const uint8_t *)data_.data(); }
size_t GetBinDataSize() const { return data_.size(); }
OpKernelBin(const OpKernelBin &) = delete;
const OpKernelBin &operator=(const OpKernelBin &) = delete;
private:
std::string name_;
std::vector<char> data_;
};
using OpKernelBinPtr = std::shared_ptr<OpKernelBin>;
} // namespace kernel
} // namespace mindspore

View File

@ -34,6 +34,7 @@
#include "runtime/device/ascend/tasksink/task_generator.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/session/kernel_build_client.h"
#include "backend/kernel_compiler/aicpu/aicpu_kernel_load.h"
#ifndef ENABLE_SECURITY
#include "runtime/device/ascend/profiling/profiling_manager.h"
#include "runtime/device/ascend/profiling/profiling_utils.h"
@ -286,6 +287,7 @@ void AscendKernelRuntime::ReleaseDeviceRes() {
if (mem_manager_ != nullptr) {
mem_manager_->FreeDeviceMemory();
}
mindspore::kernel::AicpuOpKernelLoad::GetInstance().FreeDeviceMemory();
auto rt_ret = rtRegTaskFailCallbackByModule(kModuleName, nullptr);
if (rt_ret != RT_ERROR_NONE) {
@ -438,6 +440,9 @@ bool AscendKernelRuntime::Load(const session::KernelGraph &graph, bool is_task_s
if (!LoadTask(graph)) {
return false;
}
if (!mindspore::kernel::AicpuOpKernelLoad::GetInstance().LaunchAicpuKernelSo()) {
return false;
}
return true;
}

View File

@ -73,13 +73,14 @@ void AicpuTask::Distribute() {
// for data dump
input_output_addr_ = reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(args_) + io_addr_offset);
auto dump_flag = task_info_->dump_flag() ? RT_KERNEL_DUMPFLAG : RT_KERNEL_DEFAULT;
auto cpu_flag = task_info_->cust_aicpu() ? RT_KERNEL_CUSTOM_AICPU : dump_flag;
MS_LOG(INFO) << "Distribute AicpuTask start, args_size = " << args_size << ", io_addrs_num =" << io_addrs_num
<< ", so_name = " << task_info_->so_name() << ", kernel_name = " << task_info_->kernel_name()
<< ", dump_flag = " << dump_flag;
rt_ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(task_info_->so_name().data()),
reinterpret_cast<const void *>(task_info_->kernel_name().data()), 1, args_,
args_size, nullptr, stream_, dump_flag);
args_size, nullptr, stream_, cpu_flag);
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Call rt api rtCpuKernelLaunchWithFlag failed, ret: " << rt_ret;
}

View File

@ -119,14 +119,15 @@ class AicpuTaskInfo : public TaskInfo {
AicpuTaskInfo(const std::string &op_name, uint32_t stream_id, const std::string &so_name,
const std::string &kernel_name, const std::string &node_def, const std::string &ext_info,
const std::vector<void *> &input_data_addrs, const std::vector<void *> &output_data_addrs,
bool dump_flag)
bool dump_flag, bool cust_aicpu = false)
: TaskInfo(op_name, stream_id, TaskInfoType::AICPU, dump_flag),
so_name_(so_name),
kernel_name_(kernel_name),
node_def_(node_def),
ext_info_(ext_info),
input_data_addrs_(input_data_addrs),
output_data_addrs_(output_data_addrs) {}
output_data_addrs_(output_data_addrs),
cust_aicpu_(cust_aicpu) {}
~AicpuTaskInfo() override {}
const std::string &so_name() const { return so_name_; }
@ -135,6 +136,7 @@ class AicpuTaskInfo : public TaskInfo {
const std::vector<void *> &input_data_addrs() const { return input_data_addrs_; }
const std::vector<void *> &output_data_addrs() const { return output_data_addrs_; }
const std::string &ext_info() const { return ext_info_; }
const bool &cust_aicpu() const { return cust_aicpu_; }
private:
std::string so_name_;
@ -143,6 +145,7 @@ class AicpuTaskInfo : public TaskInfo {
std::string ext_info_;
std::vector<void *> input_data_addrs_;
std::vector<void *> output_data_addrs_;
bool cust_aicpu_;
};
class LabelSetTaskInfo : public TaskInfo {

View File

@ -493,6 +493,7 @@ constexpr auto kAttrDstType = "dst_type";
constexpr auto kAttrDump = "dump";
constexpr auto kAttrSkipNopOpAddr = "skip_nop_op_addr";
constexpr auto kAttrFuncType = "func_type";
constexpr auto kAttrCustAicpu = "cust_aicpu";
// custom operator func type
constexpr auto kCustomTypeAOT = "aot";

View File

@ -631,6 +631,38 @@ def prim_attr_register(fn):
return deco
def custom_aicpu_register(custom_aicpu_so="mindspore_aicpu_kernels"):
"""Register custom aicpu attribute.
Args:
custom_aicpu_so (str): Path of the dynamic library loaded by the aicpu ops.
Default: "mindspore_aicpu_kernels"
"""
def deco(fn):
def wrapper(self, *args, **kwargs):
if not isinstance(custom_aicpu_so, str):
raise ValueError(f"custom_aicpu_so must be a str, but got {custom_aicpu_so}")
class_name = self.__class__.__name__
if hasattr(self.__class__, "substitute_name"):
class_name = self.__class__.substitute_name
if isinstance(self, PrimitiveWithInfer):
PrimitiveWithInfer.__init__(self, class_name)
elif isinstance(self, PrimitiveWithCheck):
PrimitiveWithCheck.__init__(self, class_name)
else:
Primitive.__init__(self, self.__class__.__name__)
attr_name = "cust_aicpu"
self.add_prim_attr(attr_name, custom_aicpu_so)
self.init_attrs[attr_name] = custom_aicpu_so
ret = fn(self, *args, **kwargs)
return ret
return wrapper
return deco
def constexpr(fn=None, get_instance=True, name=None):
"""
Creates a PrimitiveWithInfer operator that can infer the value at compile time. We can use it to define a function

View File

@ -14,6 +14,7 @@
* limitations under the License.
*/
#include "backend/kernel_compiler/kernel.h"
#include "backend/kernel_compiler/aicpu/aicpu_kernel_load.h"
namespace mindspore {
namespace kernel {
@ -22,5 +23,8 @@ namespace kernel {
* @brief build op and return a callable mod
*/
KernelModPtr AicpuOpBuild(const AnfNodePtr &anf_node) { return nullptr; }
bool AicpuOpKernelLoad::LaunchAicpuKernelSo() { return true; }
void AicpuOpKernelLoad::FreeDeviceMemory() {}
} // namespace kernel
} // namespace mindspore