!28310 dynamic_kernel_mod

Merge pull request !28310 from TuDouNi/dynamic_shape_stage1
This commit is contained in:
i-robot 2022-01-04 12:48:53 +00:00 committed by Gitee
commit e4438f3028
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
38 changed files with 1430 additions and 91 deletions

View File

@ -1,5 +1,6 @@
file(GLOB_RECURSE KERNEL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"kernel_build_info.cc"
"kernel.cc"
"kash/*.cc"
"common_utils.cc"
"oplib/*.cc"
@ -12,6 +13,7 @@ endif()
if(ENABLE_D)
file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"ascend_kernel_mod.cc"
"kernel_query.cc"
"tbe/*.cc"
"host/*.cc"

View File

@ -36,13 +36,12 @@ using HostDynamicKernel = mindspore::device::ascend::HostDynamicKernel;
namespace mindspore {
namespace kernel {
AicpuOpKernelMod::AicpuOpKernelMod() : anf_node_(nullptr) {}
AicpuOpKernelMod::AicpuOpKernelMod() {}
AicpuOpKernelMod::~AicpuOpKernelMod() {
args_.clear();
inputList_.clear();
outputList_.clear();
anf_node_ = nullptr;
input_list_.clear();
output_list_.clear();
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
@ -55,9 +54,9 @@ void AicpuOpKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) {
const std::vector<size_t> &AicpuOpKernelMod::GetOutputSizeList() const { return output_size_list_; }
void AicpuOpKernelMod::SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; }
const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; }
void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; }
void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); }
void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &input_list) { input_list_ = input_list; }
void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &output_list) { output_list_ = output_list; }
void AicpuOpKernelMod::SetNodeDef(const std::string &node_def) { (void)node_def_str_.assign(node_def); }
void AicpuOpKernelMod::SetExtInfo(const std::string &ext_info) { ext_info_ = ext_info; }
void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; }
void AicpuOpKernelMod::SetCustSo(const std::string &cust_so) {
@ -85,11 +84,18 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs
node_so_ = kLibAicpuKernelSoName;
}
}
} else {
if (kCpuKernelBaseOps.find(node_name_) == kCpuKernelBaseOps.end()) {
} else if (kCpuKernelBaseOps.find(node_name_) == kCpuKernelBaseOps.end()) {
node_name_ = kCpuRunApi;
}
if (node_name_ == kTopK) {
node_name_ = kTopKV2;
}
if (node_name_ == kStack) {
node_name_ = kPack;
}
// InputOutputAddr
vector<void *> io_addrs;
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(io_addrs),
@ -120,6 +126,8 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs
aicpu_param_head.extInfoAddr = 0;
} else {
MS_LOG(INFO) << "Dynamic Kernel Ext Info size:" << ext_info_.size();
aicpu_param_head.extInfoLength = SizeToUint(ext_info_.size());
aicpu_param_head.extInfoAddr = reinterpret_cast<uint64_t>(ext_info_addr_dev_);
}
args_.clear();
@ -162,6 +170,8 @@ bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::
}
MS_LOG(INFO) << "Aicpu launch, node_so_:" << node_so_ << ", node name:" << node_name_
<< ", args_size:" << args_.length();
// cppcheck-suppress unreadVariable
auto lock = AscendKernelMod::LockRuntime();
if (rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(node_so_.c_str()),
reinterpret_cast<const void *>(node_name_.c_str()), 1,
reinterpret_cast<const void *>(args_.data()), static_cast<uint32_t>(args_.length()),

View File

@ -25,6 +25,8 @@ namespace kernel {
class AicpuOpKernelMod : public AscendKernelMod {
public:
AicpuOpKernelMod();
explicit AicpuOpKernelMod(const AnfNodePtr &anf_node_ptr) : AscendKernelMod(anf_node_ptr) {}
~AicpuOpKernelMod() override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
@ -33,10 +35,10 @@ class AicpuOpKernelMod : public AscendKernelMod {
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
void SetInputList(const std::vector<int64_t> &inputList);
void SetOutputList(const std::vector<int64_t> &outputList);
void SetInputList(const std::vector<int64_t> &input_list);
void SetOutputList(const std::vector<int64_t> &output_list);
void SetAnfNode(const AnfNodePtr &anf_node);
void SetNodeDef(const std::string &nodeDef);
void SetNodeDef(const std::string &node_def);
void SetExtInfo(const std::string &ext_info);
void SetNodeName(const std::string &node_name);
void SetCustSo(const std::string &cust_so);
@ -56,16 +58,18 @@ class AicpuOpKernelMod : public AscendKernelMod {
const std::vector<size_t> &GetOutputSizeList() const override;
const std::vector<size_t> &GetWorkspaceSizeList() const override;
private:
bool cust_kernel_{false};
protected:
std::string args_;
std::string node_def_str_;
std::string ext_info_;
std::string node_name_;
std::string node_so_;
std::string ext_info_;
std::vector<int64_t> inputList_;
std::vector<int64_t> outputList_;
AnfNodePtr anf_node_;
bool cust_kernel_{false};
std::string node_def_str_;
void *ext_info_addr_dev_ = nullptr;
private:
std::vector<int64_t> input_list_;
std::vector<int64_t> output_list_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;

View File

@ -0,0 +1,231 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/aicpu/dynamic_aicpu_kernel_mod.h"
#include <memory>
#include <vector>
#include <string>
#include <algorithm>
#include "runtime/mem.h"
#include "acl/acl_rt.h"
#include "utils/convert_utils.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include "utils/ms_context.h"
#include "runtime/device/kernel_runtime.h"
#include "runtime/kernel.h"
#include "utils/utils.h"
#include "backend/session/anf_runtime_algorithm.h"
namespace mindspore {
namespace kernel {
DynamicAicpuOpKernelMod::DynamicAicpuOpKernelMod(const AnfNodePtr &anf_node_ptr) : AicpuOpKernelMod(anf_node_ptr) {
unknow_type_ = device::ascend::UnknowShapeOpType::DEPEND_IN_SHAPE;
auto cnode = anf_node_ptr->cast<CNodePtr>();
if (cnode != nullptr) {
auto op_name = AnfAlgo::GetCNodeName(cnode);
if (kComputeDepend.find(op_name) != kComputeDepend.end()) {
unknow_type_ = device::ascend::UnknowShapeOpType::DEPEND_COMPUTE;
}
}
}
DynamicAicpuOpKernelMod::~DynamicAicpuOpKernelMod() {
// free dev ptr
if (ext_info_addr_dev_ == nullptr) {
return;
}
auto ret = rtFree(ext_info_addr_dev_);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "rtFree failed";
}
}
void DynamicAicpuOpKernelMod::InferOp() {
auto node = anf_node_.lock();
MS_EXCEPTION_IF_NULL(node);
if (!AnfAlgo::IsDynamicShape(node)) {
MS_LOG(EXCEPTION) << "The node is not dynamic shape.";
}
KernelMod::InferShape();
}
void DynamicAicpuOpKernelMod::InitOp() {
auto node = anf_node_.lock();
MS_EXCEPTION_IF_NULL(node);
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (!AnfAlgo::IsDynamicShape(cnode)) {
MS_LOG(EXCEPTION) << "The node is not dynamic shape: " << cnode->fullname_with_scope();
}
MS_LOG(INFO) << "UpdateExtInfo of " << cnode->fullname_with_scope() << " start";
auto input_num = AnfAlgo::GetInputTensorNum(cnode);
auto output_num = AnfAlgo::GetOutputTensorNum(cnode);
if (input_num == 0 && output_num == 0) {
MS_LOG(INFO) << "Node:" << cnode->fullname_with_scope() << " no need to update output shape";
return;
}
// Parse aicpu ext info
ext_info_handler_ = std::make_shared<device::ascend::AicpuExtInfoHandler>(
cnode->fullname_with_scope(), static_cast<uint32_t>(input_num), static_cast<uint32_t>(output_num), unknow_type_);
MS_EXCEPTION_IF_NULL(ext_info_handler_);
if (!ext_info_handler_->Parse(ext_info_)) {
MS_LOG(EXCEPTION) << "Parse AiCpu ext_info_handler failed";
}
if (ext_info_.empty()) {
MS_LOG(INFO) << "No need to copy to device, ext_info_ is empty. ";
return;
}
for (size_t i = 0; i < input_num; ++i) {
if (!ext_info_handler_->UpdateInputShapeAndType(i, NOT_NULL(cnode))) {
MS_LOG(EXCEPTION) << "Update input shape failed, cnode:" << cnode->fullname_with_scope() << " input:" << i;
}
}
if (unknow_type_ != device::ascend::UnknowShapeOpType::DEPEND_COMPUTE) {
for (size_t i = 0; i < output_num; ++i) {
if (!ext_info_handler_->UpdateOutputShapeAndType(i, NOT_NULL(cnode))) {
MS_LOG(EXCEPTION) << "Update output shape failed, cnode:" << cnode->fullname_with_scope() << " output:" << i;
}
}
}
}
void DynamicAicpuOpKernelMod::AllocateExtInfoDeviceAddr(const CNodePtr &cnode) {
MS_EXCEPTION_IF_NULL(cnode);
if (ext_info_addr_dev_ != nullptr) {
return;
}
// Allocate ext info addr in device
if (ext_info_.size() != 0) {
auto ret = rtMalloc(&ext_info_addr_dev_, ext_info_.size(), RT_MEMORY_HBM);
if (ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Call rtMalloc ext_info_addr_dev_ failed. Op name: " << cnode->fullname_with_scope();
}
}
ext_info_size_ = ext_info_.size();
}
bool DynamicAicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (stream_ptr == nullptr) {
MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
return false;
}
if (stream_ == nullptr) {
stream_ = stream_ptr;
}
auto node = anf_node_.lock();
MS_EXCEPTION_IF_NULL(node);
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
MS_LOG(INFO) << "Start launch of node: " << cnode->fullname_with_scope();
// is dynamic shape
if (!AnfAlgo::IsDynamicShape(cnode)) {
MS_LOG(EXCEPTION) << "The cnode is not dynamic shape:" << cnode->fullname_with_scope();
}
// copy extinfo to device
AllocateExtInfoDeviceAddr(cnode);
MS_EXCEPTION_IF_NULL(ext_info_handler_);
auto ret = aclrtMemcpy(ext_info_addr_dev_, ext_info_size_, ext_info_handler_->GetExtInfo(),
ext_info_handler_->GetExtInfoLen(), ACL_MEMCPY_HOST_TO_DEVICE);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "UpdateExtInfo aclrtMemcpy failed. Node info: " << cnode->fullname_with_scope();
return false;
}
AicpuOpKernelMod::CreateCpuKernelInfo(inputs, outputs);
MS_LOG(INFO) << "Aicpu launch, node_so_:" << node_so_ << ", node name:" << node_name_
<< ", args_size:" << args_.length();
// cppcheck-suppress unreadVariable
auto lock = AscendKernelMod::LockRuntime();
ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(node_so_.c_str()),
reinterpret_cast<const void *>(node_name_.c_str()), 1,
reinterpret_cast<const void *>(args_.data()), static_cast<uint32_t>(args_.length()),
nullptr, stream_, RT_KERNEL_DEFAULT);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Aicpu op launch failed!";
return false;
}
if (unknow_type_ == device::ascend::UnknowShapeOpType::DEPEND_COMPUTE) {
ret = aclrtMemcpyAsync(ext_info_handler_->GetExtInfo(), ext_info_handler_->GetExtInfoLen(), ext_info_addr_dev_,
ext_info_size_, ACL_MEMCPY_DEVICE_TO_HOST, stream_);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "aclrtMemcpyAsync output shape failed. Op name: " << cnode->fullname_with_scope();
return false;
}
}
return true;
}
void DynamicAicpuOpKernelMod::UpdateOp() {
auto node = anf_node_.lock();
MS_EXCEPTION_IF_NULL(node);
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
MS_LOG(INFO) << "Aicpu " << cnode->fullname_with_scope() << " PostExecute";
// is dynamic shape
if (!AnfAlgo::IsDynamicShape(cnode)) {
MS_LOG(EXCEPTION) << "The cnode is not dynamic shape:" << cnode->fullname_with_scope();
}
if (unknow_type_ != device::ascend::UnknowShapeOpType::DEPEND_COMPUTE) {
MS_LOG(INFO) << "Node " << node->fullname_with_scope() << " update op skip.";
return;
}
// cppcheck-suppress unreadVariable
auto lock = AscendKernelMod::LockRuntime();
auto ret = rtStreamSynchronize(stream_);
if (ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Call runtime rtStreamSynchronize failed. Op name: " << cnode->fullname_with_scope();
}
MS_LOG(INFO) << "Update aicpu kernel output shape from ext_info. Op name: " << cnode->fullname_with_scope();
UpdateOutputShapeFromExtInfo(cnode);
}
bool DynamicAicpuOpKernelMod::UpdateOutputShapeFromExtInfo(const CNodePtr &cnode) {
MS_EXCEPTION_IF_NULL(cnode);
MS_LOG(INFO) << "UpdateOutputShapeFromExtInfo start. Op name " << cnode->fullname_with_scope();
MS_EXCEPTION_IF_NULL(ext_info_handler_);
std::vector<TypeId> type_ids;
std::vector<std::vector<size_t>> shapes;
auto output_num = AnfAlgo::GetOutputTensorNum(cnode);
for (size_t i = 0; i < output_num; ++i) {
MS_LOG(INFO) << "Get output:" << output_num << " Shape";
std::vector<int64_t> shape;
TypeId type_id;
(void)ext_info_handler_->GetOutputShapeAndType(SizeToUint(i), NOT_NULL(&shape), NOT_NULL(&type_id));
type_ids.emplace_back(type_id);
std::vector<size_t> size_t_shape;
std::transform(shape.begin(), shape.end(), std::back_inserter(size_t_shape), LongToSize);
shapes.emplace_back(size_t_shape);
}
AnfAlgo::SetOutputInferTypeAndShape(type_ids, shapes, cnode.get());
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,54 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_DYNAMIC_AICPU_KERNEL_MOD_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_DYNAMIC_AICPU_KERNEL_MOD_H_
#include <vector>
#include <memory>
#include <string>
#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
#include "runtime/device/ascend/executor/aicpu_ext_info_handle.h"
namespace mindspore {
namespace kernel {
class DynamicAicpuOpKernelMod : public AicpuOpKernelMod {
public:
DynamicAicpuOpKernelMod() : unknow_type_(device::ascend::UnknowShapeOpType::DEPEND_IN_SHAPE) {}
explicit DynamicAicpuOpKernelMod(const AnfNodePtr &anf_node_ptr);
~DynamicAicpuOpKernelMod() override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
void InferOp() override;
void InitOp() override;
void UpdateOp() override;
private:
void AllocateExtInfoDeviceAddr(const CNodePtr &cnode);
bool UpdateOutputShapeFromExtInfo(const CNodePtr &cnode);
std::shared_ptr<device::ascend::AicpuExtInfoHandler> ext_info_handler_ = nullptr;
size_t ext_info_size_ = 0;
device::ascend::UnknowShapeOpType unknow_type_;
};
using DynamicAicpuOpKernelModPtr = std::shared_ptr<DynamicAicpuOpKernelMod>;
using DynamicAicputOpKernelModPtrList = std::vector<DynamicAicpuOpKernelModPtr>;
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_DYNAMIC_AICPU_KERNEL_MOD_H_

View File

@ -0,0 +1,35 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/ascend_kernel_mod.h"
#include "runtime/rt.h"
namespace mindspore {
namespace kernel {
void AscendKernelMod::UpdateOp() {
MS_EXCEPTION_IF_NULL(stream_);
// cppcheck-suppress unreadVariable
auto lock = LockRuntime();
if (RT_ERROR_NONE != rtStreamSynchronize(stream_)) {
MS_LOG(EXCEPTION) << "Call runtime rtStreamSynchronize failed.";
}
}
std::lock_guard<std::mutex> AscendKernelMod::LockRuntime() {
static std::mutex mutex;
return std::lock_guard<std::mutex>(mutex);
}
} // namespace kernel
} // namespace mindspore

View File

@ -31,6 +31,8 @@ namespace mindspore {
namespace kernel {
class AscendKernelMod : public KernelMod {
public:
AscendKernelMod() {}
explicit AscendKernelMod(const AnfNodePtr &anf_node_ptr) : KernelMod(anf_node_ptr) {}
virtual std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &, uint32_t) = 0;
uint32_t block_dim() { return block_dim_; }
@ -44,6 +46,7 @@ class AscendKernelMod : public KernelMod {
return false;
#endif
}
void UpdateOp() override;
void InitDynamicKernel(const CNodePtr &cnode_ptr, void *stream) {
if (dynamic_kernel_ == nullptr) {
@ -54,6 +57,8 @@ class AscendKernelMod : public KernelMod {
}
device::DynamicKernelPtr DynamicKernel() const { return dynamic_kernel_; }
static std::lock_guard<std::mutex> LockRuntime();
protected:
uint32_t block_dim_{1};
uint32_t stream_id_{0};

View File

@ -66,7 +66,13 @@ HcclKernelFactory &HcclKernelFactory::Get() {
HcclKernel::HcclKernel()
: hccl_count_(0), op_type_(::HcclReduceOp::HCCL_REDUCE_SUM), root_id_(0), src_rank_(0), dest_rank_(0) {}
HcclKernel::HcclKernel(const AnfNodePtr &anf_node)
: AscendKernelMod(),
hccl_count_(0),
op_type_(::HcclReduceOp::HCCL_REDUCE_SUM),
root_id_(0),
src_rank_(0),
dest_rank_(0) {}
HcclKernel::~HcclKernel() {
hccl_kernel_input_shape_list_.clear();
hccl_kernel_output_shape_list_.clear();
@ -294,5 +300,99 @@ device::DynamicKernelPtr HcclKernel::GenDynamicKernel(const CNodePtr &cnode_ptr,
hccl_type, input_data_addr, output_data_addr, hccl_count_, data_type, op_type_, root_id_, stream_ptr, cnode_ptr);
return executor;
}
void HcclKernel::InferOp() {
if (AnfAlgo::IsDynamicShape(anf_node_.lock())) {
KernelMod::InferShape();
}
}
bool HcclKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
auto node = anf_node_.lock();
MS_EXCEPTION_IF_NULL(node);
if (!node->isa<CNode>()) {
MS_LOG(EXCEPTION) << "anfnode is not a cnode";
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (inputs.empty() && outputs.empty()) {
MS_LOG(ERROR) << "Hccl kernel input or output is empty";
return false;
}
if (hccl_data_type_list_.empty()) {
MS_LOG(ERROR) << "Hccl data type list is empty";
return false;
}
MS_EXCEPTION_IF_NULL(stream_ptr);
MS_LOG(INFO) << "Start Execute: " << cnode->DebugString();
std::string hccl_type = MsOpNameToHcomOpType(AnfAlgo::GetCNodeName(anf_node_.lock()));
HcclDataType data_type = hccl_data_type_list_[0];
::HcomOperation op_info;
op_info.hcclType = hccl_type;
op_info.inputPtr = inputs[0]->addr;
op_info.outputPtr = outputs[0]->addr;
op_info.dataType = static_cast<HcclDataType>(data_type);
op_info.opType = static_cast<HcclReduceOp>(op_type_);
op_info.root = IntToUint(root_id_);
op_info.count = hccl_count_;
auto callback = [this](HcclResult status) {
if (status != HCCL_SUCCESS) {
MS_LOG(ERROR) << "HcomExcutorInitialize failed, ret:" << status;
}
std::lock_guard<std::mutex> lock(this->hccl_mutex_);
this->cond_.notify_all();
MS_LOG(INFO) << "hccl callback success.";
};
auto hccl_ret = hccl::HcclAdapter::GetInstance().HcclExecEnqueueOp(op_info, callback);
if (hccl_ret != HCCL_SUCCESS) {
MS_LOG(EXCEPTION) << "Call EnqueueHcomOperation failed, node info: " << cnode->DebugString();
return false;
}
std::unique_lock<std::mutex> ulock(hccl_mutex_);
cond_.wait(ulock);
MS_LOG(INFO) << "Execute " << cnode->DebugString() << " success";
return true;
}
void HcclKernel::InitOp() {
auto node = anf_node_.lock();
MS_EXCEPTION_IF_NULL(node);
if (!node->isa<CNode>()) {
MS_LOG(EXCEPTION) << "anfnode is not a cnode";
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (!AnfAlgo::IsDynamicShape(cnode)) {
MS_LOG(DEBUG) << "The node is not dynamic shape: " << cnode->fullname_with_scope();
return;
}
MS_LOG(INFO) << "Start to InitOp. Node info: " << cnode->DebugString();
std::vector<std::vector<size_t>> hccl_kernel_input_shape_list;
if (!HcomUtil::GetKernelInputShape(cnode, &hccl_kernel_input_shape_list)) {
MS_LOG(EXCEPTION) << "GetKernelInputShape fail! Node info: " << cnode->DebugString();
}
std::vector<HcclDataType> hccl_data_type_list;
if (!HcomUtil::GetHcomDataType(cnode, &hccl_data_type_list)) {
MS_LOG(EXCEPTION) << "GetHcomDataType fail! Node info: " << cnode->DebugString();
}
// Update Hccl count
if (!HcomUtil::GetHcomCount(cnode, hccl_data_type_list, hccl_kernel_input_shape_list, &hccl_count_)) {
MS_LOG(EXCEPTION) << "GetHcomCount fail! Node info: " << cnode->DebugString();
}
MS_LOG(INFO) << "Update Hccl count:" << hccl_count_;
}
} // namespace kernel
} // namespace mindspore

View File

@ -34,6 +34,7 @@ namespace kernel {
class HcclKernel : public AscendKernelMod {
public:
HcclKernel();
explicit HcclKernel(const AnfNodePtr &anf_node);
~HcclKernel() override;
virtual bool Init(const AnfNodePtr &anf_node);
const std::vector<size_t> &GetInputSizeList() const override;
@ -43,6 +44,12 @@ class HcclKernel : public AscendKernelMod {
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
void InferOp() override;
void InitOp() override;
protected:
std::vector<std::vector<size_t>> hccl_kernel_input_shape_list_;
std::vector<std::vector<size_t>> hccl_kernel_output_shape_list_;
@ -56,9 +63,10 @@ class HcclKernel : public AscendKernelMod {
mutable std::vector<size_t> input_size_list_;
mutable std::vector<size_t> output_size_list_;
mutable std::vector<size_t> workspace_size_list_;
AnfNodeWeakPtr anf_node_;
std::string op_name_;
std::string group_;
std::mutex hccl_mutex_;
std::condition_variable cond_;
};
using HcclKernelCreater = std::function<std::shared_ptr<HcclKernel>()>;

View File

@ -16,6 +16,7 @@
#include "backend/kernel_compiler/host/dynamic_broadcast_gradient_args_kernel.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "runtime/device/ascend/ascend_kernel_runtime.h"
#include "utils/trace_base.h"
namespace mindspore {
@ -195,6 +196,15 @@ void DynamicBroadcastGradientArgsKernel::Execute() {
input_shapes[1] = GetInputShape(cnode, 1);
auto grad_reduce_idx = CalculateOutput(input_shapes);
auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime();
MS_EXCEPTION_IF_NULL(runtime_instance);
// cppcheck-suppress unreadVariable
auto lock = AscendKernelMod::LockRuntime();
auto ret = runtime_instance->SyncStream();
if (!ret) {
MS_LOG(EXCEPTION) << "Sync stream error!";
}
auto r0_size = SetOutputValue(cnode, grad_reduce_idx, 0, input_shapes[0].size());
auto r1_size = SetOutputValue(cnode, grad_reduce_idx, 1, input_shapes[1].size());
@ -209,5 +219,26 @@ device::DynamicKernelPtr DynamicBroadcastGradientArgsKernelMod::GenDynamicKernel
void *stream_ptr) {
return std::make_shared<DynamicBroadcastGradientArgsKernel>(stream_ptr, cnode_ptr);
}
bool DynamicBroadcastGradientArgsKernelMod::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &, void *stream_ptr) {
auto node = anf_node_.lock();
MS_EXCEPTION_IF_NULL(node);
if (!node->isa<CNode>()) {
MS_LOG(EXCEPTION) << "anfnode is not a cnode";
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
stream_ = stream_ptr;
auto broadcast_grad_kernel = std::make_shared<DynamicBroadcastGradientArgsKernel>(stream_ptr, cnode);
try {
broadcast_grad_kernel->Execute();
} catch (const std::exception &e) {
MS_LOG(ERROR) << "DynamicBroadcastGradientArgsKernel Launch failed. node: " << cnode->fullname_with_scope()
<< ", Error message is " << e.what();
return false;
}
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -36,6 +36,8 @@ class DynamicBroadcastGradientArgsKernelMod : public HostKernelMod {
DynamicBroadcastGradientArgsKernelMod() = default;
~DynamicBroadcastGradientArgsKernelMod() override = default;
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
};
MS_HOST_REG_KERNEL(DynamicBroadcastGradientArgs, DynamicBroadcastGradientArgsKernelMod);
} // namespace kernel

View File

@ -114,5 +114,26 @@ void DynamicReshapeKernel::Execute() {
device::DynamicKernelPtr DynamicReshapeKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
return std::make_shared<DynamicReshapeKernel>(stream_ptr, cnode_ptr);
}
bool DynamicReshapeKernelMod::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &, void *stream_ptr) {
auto node = anf_node_.lock();
MS_EXCEPTION_IF_NULL(node);
if (!node->isa<CNode>()) {
MS_LOG(EXCEPTION) << "anfnode is not a cnode";
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
stream_ = stream_ptr;
auto reshape_kernel = std::make_shared<DynamicReshapeKernel>(stream_ptr, cnode);
try {
reshape_kernel->Execute();
} catch (const std::exception &e) {
MS_LOG(ERROR) << "DynamicReshapeKernel Launch failed. node: " << cnode->fullname_with_scope()
<< ", Error message is " << e.what();
return false;
}
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -35,6 +35,9 @@ class DynamicReshapeKernelMod : public HostKernelMod {
DynamicReshapeKernelMod() = default;
~DynamicReshapeKernelMod() override = default;
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
void UpdateOp() override { AscendKernelMod::UpdateOp(); }
};
MS_HOST_REG_KERNEL(DynamicReshape, DynamicReshapeKernelMod);
} // namespace kernel

View File

@ -57,6 +57,8 @@ void DynamicShapeKernel::Execute() {
} else {
auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime();
MS_EXCEPTION_IF_NULL(runtime_instance);
// cppcheck-suppress unreadVariable
auto lock = AscendKernelMod::LockRuntime();
auto ret = runtime_instance->SyncStream();
if (!ret) {
MS_LOG(EXCEPTION) << "Sync stream error!";
@ -106,5 +108,23 @@ void DynamicShapeKernel::Execute(const std::vector<AddressPtr> &inputs, const st
device::DynamicKernelPtr DynamicShapeKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
return std::make_shared<DynamicShapeKernel>(stream_ptr, cnode_ptr);
}
bool DynamicShapeKernelMod::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &, void *stream_ptr) {
auto node = anf_node_.lock();
MS_EXCEPTION_IF_NULL(node);
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
stream_ = stream_ptr;
auto shape_kernel = std::make_shared<DynamicShapeKernel>(stream_ptr, cnode);
try {
shape_kernel->Execute();
} catch (const std::exception &e) {
MS_LOG(ERROR) << "DynamicShapeKernelMod Launch failed. node: " << cnode->fullname_with_scope()
<< ", Error message is " << e.what();
return false;
}
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -38,18 +38,7 @@ class DynamicShapeKernelMod : public HostKernelMod {
~DynamicShapeKernelMod() override = default;
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (kernel_ == nullptr) {
kernel_ =
std::dynamic_pointer_cast<DynamicShapeKernel>(GenDynamicKernel(anf_node_->cast<CNodePtr>(), stream_ptr));
kernel_->Initialize();
}
kernel_->Execute(inputs, outputs);
return true;
}
private:
std::shared_ptr<DynamicShapeKernel> kernel_;
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
};
MS_HOST_REG_KERNEL(DynamicShape, DynamicShapeKernelMod);
} // namespace kernel

View File

@ -77,6 +77,16 @@ bool HostKernelMod::Launch(const std::vector<AddressPtr> &, const std::vector<Ad
const std::vector<AddressPtr> &, void *) {
return true;
}
void HostKernelMod::InferOp() {
auto node = anf_node_.lock();
MS_EXCEPTION_IF_NULL(node);
if (!AnfAlgo::IsDynamicShape(node)) {
MS_LOG(EXCEPTION) << "The node is not dynamic shape.";
}
KernelMod::InferShape();
}
std::vector<TaskInfoPtr> HostKernelMod::GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &, uint32_t) {
return {};

View File

@ -36,9 +36,10 @@ class HostKernelMod : public AscendKernelMod {
const std::vector<AddressPtr> &, uint32_t) override;
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override = 0;
bool Init(const AnfNodePtr &anf_node);
void InferOp() override;
void UpdateOp() override {}
protected:
AnfNodePtr anf_node_;
std::string op_name_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;

View File

@ -0,0 +1,184 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/kernel.h"
#include <algorithm>
#include <stack>
#include <utility>
#include "utils/ms_context.h"
#include "utils/anf_utils.h"
#include "utils/ms_device_shape_transfer.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "backend/optimizer/common/helper.h"
namespace mindspore {
namespace kernel {
constexpr int64_t kInvalidShape = -2;
void KernelMod::InferShape() {
auto node = anf_node_.lock();
MS_EXCEPTION_IF_NULL(node);
if (!node->isa<CNode>()) {
MS_LOG(EXCEPTION) << "anfnode is not a cnode";
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
MS_LOG(INFO) << "InferShape start, node:" << cnode->fullname_with_scope();
GetDepndLists(cnode);
auto ret = InferShapeForDefiniteOutputNode(cnode);
if (ret) {
return;
}
depend_tensor_map_.clear();
auto inputs = cnode->inputs();
if (inputs.empty()) {
MS_LOG(EXCEPTION) << "Invalid inputs";
}
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
AbstractBasePtrList args_spec_list;
auto primitive = GetValueNode<PrimitivePtr>(inputs[0]);
auto input_size = AnfAlgo::GetInputTensorNum(cnode);
std::vector<AnfNodePtr> input_nodes;
for (size_t i = 0; i < input_size; i++) {
auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(cnode, i);
auto real_input = input_node_with_index.first;
MS_EXCEPTION_IF_NULL(real_input);
auto cnode_input = cnode->input(i + 1);
MS_EXCEPTION_IF_NULL(cnode_input);
InferShapeForNopNode(&real_input);
if (depend_list_.find(i) != depend_list_.end()) {
auto pre_node_with_index = AnfAlgo::GetPrevNodeOutput(cnode, i);
bool skip_nop_node = !context->get_param<bool>(MS_CTX_ENABLE_MINDRT);
auto output_addr = AnfAlgo::GetPrevNodeMutableOutputAddr(cnode, i, skip_nop_node);
std::vector<int64_t> shapes =
trans::GetRuntimePaddingShape(pre_node_with_index.first, pre_node_with_index.second);
auto host_type = AnfAlgo::GetOutputInferDataType(pre_node_with_index.first, pre_node_with_index.second);
auto out_tensor = std::make_shared<tensor::Tensor>(host_type, shapes);
MS_EXCEPTION_IF_NULL(out_tensor);
// The second parameter must be false, otherwise the device address cannot be released and allocated, and the
// address size will be wrong in the dynamic shape scenario.
out_tensor->set_device_address(output_addr, false);
auto ret2 = depend_tensor_map_.try_emplace(i, out_tensor);
if (!ret2.second) {
MS_LOG(EXCEPTION) << "Insert map failed";
}
out_tensor->data_sync();
auto lock = AnfUtils::GetAbstractLock(real_input.get());
MS_EXCEPTION_IF_NULL(real_input->abstract());
auto real_abs = real_input->abstract()->Clone();
if (real_abs->isa<abstract::AbstractTensor>()) {
real_abs->set_value(out_tensor);
} else if (real_abs->isa<abstract::AbstractTuple>()) {
auto tuple_get_item_index = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
auto abstract_tuple = real_abs->cast<abstract::AbstractTuplePtr>();
MS_EXCEPTION_IF_NULL(abstract_tuple);
auto tuple_elements = abstract_tuple->elements()[tuple_get_item_index];
tuple_elements->set_value(out_tensor);
}
real_input->set_abstract(real_abs);
}
bool is_cnode_input = AnfAlgo::AddArgList(&args_spec_list, cnode_input, real_input, i);
if (is_cnode_input) {
input_nodes.push_back(cnode_input);
} else {
input_nodes.push_back(real_input);
}
}
std::vector<AbstractScope> locks;
std::transform(input_nodes.begin(), input_nodes.end(), std::back_inserter(locks),
[](const AnfNodePtr &input) { return AnfUtils::GetAbstractLock(input.get()); });
auto eval_result = opt::CppInferShape(primitive, args_spec_list);
locks.clear();
// cppcheck-suppress unreadVariable
auto lock = AnfUtils::GetAbstractLock(cnode.get());
cnode->set_abstract(eval_result);
}
bool KernelMod::InferShapeForDefiniteOutputNode(const CNodePtr &cnode) {
MS_EXCEPTION_IF_NULL(cnode);
if (!AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimShape)) {
return false;
}
auto input_size = AnfAlgo::GetInputTensorNum(cnode);
if (input_size != 1) {
MS_LOG(EXCEPTION) << "Node only has one input: " << cnode->fullname_with_scope();
}
auto cur_shape = dynamic_cast<mindspore::abstract::Shape *>(cnode->Shape().get())->shape();
if (std::any_of(cur_shape.begin(), cur_shape.end(), [](int64_t x) { return x == kInvalidShape; })) {
return false;
}
std::vector<int64_t> output_shape = {static_cast<int64_t>(cur_shape.size())};
mindspore::abstract::BaseShapePtr shape = std::make_shared<mindspore::abstract::Shape>(output_shape);
auto lock = AnfUtils::GetAbstractLock(cnode.get());
auto abstract = cnode->abstract()->Clone();
MS_EXCEPTION_IF_NULL(abstract);
abstract->set_shape(shape);
cnode->set_abstract(abstract);
return true;
}
void KernelMod::InferShapeForNopNode(AnfNodePtr *input_node) {
MS_EXCEPTION_IF_NULL(*input_node);
if (!opt::IsNopNode(*input_node) || !AnfAlgo::IsDynamicShape(*input_node)) {
MS_LOG(INFO) << "Input node is not a nop node, no need infer.";
return;
}
MS_LOG(INFO) << "Infer shape for nop node.";
std::stack<AnfNodePtr> nop_road;
nop_road.push(*input_node);
/*lint -e716*/
while (true) {
auto input_node_with_idx = AnfAlgo::GetPrevNodeOutput(*input_node, 0);
auto in_node = input_node_with_idx.first;
MS_EXCEPTION_IF_NULL(in_node);
if (opt::IsNopNode(in_node)) {
nop_road.push(in_node);
*input_node = in_node;
} else {
break;
}
}
/*lint +e716*/
while (!nop_road.empty()) {
auto nop_node = nop_road.top();
MS_EXCEPTION_IF_NULL(nop_node);
AnfAlgo::InferShape(nop_node->cast<CNodePtr>());
nop_road.pop();
}
}
void KernelMod::GetDepndLists(const CNodePtr &cnode) {
MS_EXCEPTION_IF_NULL(cnode);
if (depend_list_.size() != 0) {
return;
}
auto ret = abstract::GetDependsFormMap(cnode);
if (ret.empty()) {
MS_LOG(DEBUG) << "No dynamic_shape_depends found";
return;
}
MS_LOG(INFO) << "Have depends";
(void)std::transform(ret.begin(), ret.end(), std::inserter(depend_list_, depend_list_.begin()),
[](const int64_t &value) { return static_cast<int>(value); });
MS_LOG(INFO) << "Init End";
}
} // namespace kernel
} // namespace mindspore

View File

@ -18,6 +18,8 @@
#include <vector>
#include <string>
#include <memory>
#include <map>
#include <set>
#include "nlohmann/json.hpp"
#include "ir/anf.h"
#include "ir/dtype.h"
@ -180,6 +182,8 @@ struct KernelLaunchInfo {
class KernelMod {
public:
KernelMod() {}
explicit KernelMod(const AnfNodePtr &anf_node_ptr) : anf_node_(anf_node_ptr) {}
virtual const std::vector<size_t> &GetInputSizeList() const = 0;
virtual const std::vector<size_t> &GetOutputSizeList() const = 0;
virtual const std::vector<size_t> &GetWorkspaceSizeList() const = 0;
@ -193,6 +197,10 @@ class KernelMod {
virtual std::vector<size_t> GenParameters() { return {}; }
virtual void ReleaseResource() {}
virtual void InferOp() {}
virtual void InitOp() {}
virtual void UpdateOp() {}
virtual ~KernelMod() = default;
void set_unique_name(const std::string &unique_name) { unique_name_ = unique_name; }
void set_fullname(const std::string &fullname) { fullname_ = fullname; }
@ -205,18 +213,29 @@ class KernelMod {
const std::vector<AddressPtr> &GetOutputsAddr() { return outputs_addr_; }
void SetStream(void *stream) { stream_ = stream; }
void *GetStream() const { return stream_; }
void SetAtomicCleanNodes(const std::vector<CNodePtr> &atomic_clean_node) { atomic_clean_nodes_ = atomic_clean_node; }
protected:
void InferShape();
std::string kernel_name_;
std::string unique_name_;
std::string fullname_;
bool is_monad_{false};
void *stream_{nullptr};
AnfNodeWeakPtr anf_node_;
std::map<uint32_t, tensor::TensorPtr> depend_tensor_map_;
std::vector<CNodePtr> atomic_clean_nodes_;
private:
void InferShapeForNopNode(AnfNodePtr *input_node);
void GetDepndLists(const CNodePtr &cnode);
bool InferShapeForDefiniteOutputNode(const CNodePtr &cnode);
std::vector<AddressPtr> inputs_addr_;
std::vector<AddressPtr> workspaces_addr_;
std::vector<AddressPtr> outputs_addr_;
std::set<uint32_t> depend_list_;
};
using KernelModPtr = std::shared_ptr<KernelMod>;
} // namespace kernel

View File

@ -0,0 +1,298 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/tbe/dynamic_tbe_kernel_mod.h"
#include <algorithm>
#include <stack>
#include "acl/acl_rt.h"
#include "utils/ms_context.h"
#include "backend/session/anf_runtime_algorithm.h"
#include "runtime/device/kernel_runtime.h"
#include "backend/optimizer/common/helper.h"
#include "framework/common/debug/log.h"
#include "utils/log_adapter.h"
#include "utils/convert_utils_base.h"
#include "runtime/device/kernel_runtime_manager.h"
#include "runtime/kernel.h"
#include "runtime/mem.h"
#include "pipeline/jit/static_analysis/static_analysis.h"
#include "runtime/device/ascend/executor/tiling/op_tiling_adapter.h"
#include "utils/ms_device_shape_transfer.h"
#include "utils/utils.h"
#include "register/op_tiling.h"
#include "nlohmann/json.hpp"
namespace mindspore {
namespace kernel {
using TbeTaskInfoPtr = std::shared_ptr<mindspore::ge::model_runner::TbeTaskInfo>;
using tbe::KernelManager;
using AddressPtrList = std::vector<mindspore::kernel::AddressPtr>;
DynamicTbeKernelMod::DynamicTbeKernelMod(KernelPackPtr kernel_pack, const AnfNodePtr &anf_node_ptr)
: TbeKernelMod(std::move(kernel_pack), anf_node_ptr) {
MS_EXCEPTION_IF_NULL(anf_node_ptr);
auto cnode = anf_node_ptr->cast<CNodePtr>();
if (cnode != nullptr) {
op_compile_info_ = ParseCompileJson(cnode);
}
}
DynamicTbeKernelMod::~DynamicTbeKernelMod() {
if (tiling_data_ptr_ != nullptr) {
(void)rtFree(tiling_data_ptr_);
}
}
void DynamicTbeKernelMod::InferOp() {
if (AnfAlgo::IsDynamicShape(anf_node_.lock())) {
auto node = anf_node_.lock();
MS_EXCEPTION_IF_NULL(node);
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
need_skip_execute_ = NeedSkipExecute(cnode);
if (need_skip_execute_) {
std::vector<TypeId> dtypes{AnfAlgo::GetOutputInferDataType(cnode, 0)};
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {AnfAlgo::GetInputDeviceShape(cnode, 0)}, cnode.get());
} else {
KernelMod::InferShape();
}
}
}
void DynamicTbeKernelMod::InitOp() {
auto node = anf_node_.lock();
MS_EXCEPTION_IF_NULL(node);
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
if (!AnfAlgo::IsDynamicShape(cnode)) {
MS_LOG(EXCEPTION) << "The node is not dynamic shape: " << cnode->fullname_with_scope();
}
if (!atomic_clean_nodes_.empty()) {
for (const auto &atomic_clean_node : atomic_clean_nodes_) {
AnfAlgo::GetKernelMod(atomic_clean_node)->InitOp();
}
}
if (need_skip_execute_) {
return;
}
// gen FuncStub
if (handle_ == nullptr) {
auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim_, true, &handle_, &origin_key_);
if (func_stub != 1) {
MS_LOG(EXCEPTION) << "GenFuncStub failed.";
}
}
// start compute tiling
MS_LOG(INFO) << "Start compute tiling of: " << cnode->fullname_with_scope();
optiling::utils::OpRunInfo op_run_info_v2(-1, true, 0);
device::tiling::OpTilingCalculateAdapter converter;
::ge::ComputeGraphPtr ge_graph = std::make_shared<::ge::ComputeGraph>("default");
auto ge_node = converter.AnfNodeToGeNodeAdapter(cnode, &ge_graph, depend_tensor_map_, op_compile_info_);
(void)optiling::OpParaCalculateV2(ge_node, op_run_info_v2);
block_dim_ = op_run_info_v2.GetBlockDim();
std::vector<int64_t> workspace_size_list;
op_run_info_v2.GetAllWorkspaces(workspace_size_list);
tiling_data_ = op_run_info_v2.GetAllTilingData().str();
tiling_key_ = op_run_info_v2.GetTilingKey();
workspace_size_list_.clear();
workspace_size_list_.resize(workspace_size_list.size());
std::transform(workspace_size_list.begin(), workspace_size_list.end(), workspace_size_list_.begin(),
[](int64_t size) { return static_cast<size_t>(size); });
}
std::string DynamicTbeKernelMod::ParseCompileJson(const CNodePtr &cnode) {
MS_EXCEPTION_IF_NULL(cnode);
bool get_flag = true;
std::string op_compile_info = "";
TbeUtils::GetCompileInfo(cnode, &op_compile_info, &get_flag);
if (!get_flag) {
MS_LOG(EXCEPTION) << "Get compile_info failed. The compile result of [" << cnode->fullname_with_scope()
<< "] maybe not in the json file(kernel_meta/) or the file had been deleted.";
}
MS_LOG(INFO) << "Node: " << cnode->fullname_with_scope() << " get compile_info: " << op_compile_info;
return op_compile_info;
}
void DynamicTbeKernelMod::InitTilingDataPtr() {
if (tiling_data_ptr_ != nullptr) {
return;
}
auto kernel_json_info = kernel_pack_->kernel_json_info();
auto op_para_size = kernel_json_info.op_para_size;
if (op_para_size > 0) {
auto ret = rtMalloc(&tiling_data_ptr_, op_para_size, RT_MEMORY_HBM);
if (ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "rtMalloc tiling data failed";
}
}
}
bool DynamicTbeKernelMod::CopyTilingToDevice(void *stream_ptr) {
InitTilingDataPtr();
MS_EXCEPTION_IF_NULL(kernel_pack_);
auto kernel_json_info = kernel_pack_->kernel_json_info();
auto op_para_size = kernel_json_info.op_para_size;
if (tiling_data_.size() > op_para_size) {
MS_LOG(EXCEPTION) << "Compute tiling size:" << tiling_data_.size()
<< " larger than tbe build op_para_size:" << op_para_size;
}
if (tiling_data_.empty() || tiling_data_ptr_ == nullptr) {
MS_LOG(INFO) << "Tiling size is 0, skip aclrtMemcpyAsync";
return true;
}
// cppcheck-suppress unreadVariable
auto lock = AscendKernelMod::LockRuntime();
auto ret = aclrtMemcpyAsync(tiling_data_ptr_, op_para_size, tiling_data_.c_str(), tiling_data_.size(),
ACL_MEMCPY_HOST_TO_DEVICE, stream_ptr);
if (ret != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "Tiling aclrtMemcpyAsync failed, ret:" << ret;
}
return true;
}
bool DynamicTbeKernelMod::NeedSkipExecute(const CNodePtr &cnode) {
// Skip run ReduceSum when axis is a Empty Tensor
MS_EXCEPTION_IF_NULL(cnode);
auto op_name = AnfAlgo::GetCNodeName(cnode);
if (op_name != kReduceSumOpName) {
return false;
}
const size_t axes_index = 1;
if (cnode->inputs().size() <= axes_index + 1) {
return false;
}
auto input_axes = cnode->input(axes_index + 1);
// cppcheck-suppress unreadVariable
auto lock = AnfUtils::GetAbstractLock(input_axes.get());
auto axes_abs = input_axes->abstract()->Clone();
MS_EXCEPTION_IF_NULL(axes_abs);
auto axes_shape = AnfAlgo::GetInputDeviceShape(cnode, axes_index);
if (axes_abs->isa<abstract::AbstractTensor>()) {
if (std::any_of(axes_shape.begin(), axes_shape.end(), [](ssize_t shape) { return shape == 0; })) {
return true;
}
}
return false;
}
bool DynamicTbeKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (stream_ptr == nullptr) {
MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
return false;
}
if (kernel_pack_ == nullptr) {
MS_LOG(ERROR) << "kernel pack should not be nullptr.";
return false;
}
if (stream_ == nullptr) {
stream_ = stream_ptr;
}
auto node = anf_node_.lock();
MS_EXCEPTION_IF_NULL(node);
if (!node->isa<CNode>()) {
MS_LOG(EXCEPTION) << "anfnode is not a cnode";
}
auto cnode = node->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(cnode);
// is dynamic shape
if (!AnfAlgo::IsDynamicShape(cnode)) {
MS_LOG(EXCEPTION) << "The cnode is not dynamic shape:" << cnode->fullname_with_scope();
}
if (!atomic_clean_nodes_.empty()) {
for (auto atomic_clean_node : atomic_clean_nodes_) {
KernelLaunchInfo kernel_launch_info;
auto kernel_mod = AnfAlgo::GetKernelMod(atomic_clean_node);
MS_EXCEPTION_IF_NULL(kernel_mod);
device::KernelRuntime::GenLaunchArgs(*kernel_mod, atomic_clean_node, &kernel_launch_info);
auto atomic_inputs = kernel_launch_info.inputs_;
std::vector<AddressPtr> atomic_outputs;
std::vector<AddressPtr> atomic_workspace;
kernel_mod->Launch(atomic_inputs, atomic_workspace, atomic_outputs, stream_ptr);
}
}
// need skip, for reducesum empty input axis
if (need_skip_execute_) {
// Skip reduce if axis is a empty Tensor (shape = 0)
MS_LOG(INFO) << "The node " << cnode->fullname_with_scope() << "Need Skip.";
// cppcheck-suppress unreadVariable
auto lock = AscendKernelMod::LockRuntime();
rtError_t status = aclrtMemcpyAsync(outputs[0]->addr, inputs[0]->size, inputs[0]->addr, inputs[0]->size,
ACL_MEMCPY_DEVICE_TO_DEVICE, stream_ptr);
if (status != RT_ERROR_NONE) {
MS_LOG(EXCEPTION) << "aclrtMemcpyAsync failed for " << cnode->fullname_with_scope();
}
MS_LOG(INFO) << "Execute node:" << cnode->fullname_with_scope() << " success.";
return true;
}
// copy tiling to device
if (!CopyTilingToDevice(stream_ptr)) {
MS_LOG(EXCEPTION) << "Copy tiling to device failed. op name: " << cnode->fullname_with_scope();
}
// pack all addresses into a vector.
std::vector<void *> runtimeargs;
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtimeargs),
[](const AddressPtr &input) -> void * { return input->addr; });
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtimeargs),
[](const AddressPtr &output) -> void * { return output->addr; });
if (!workspace.empty()) {
(void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(runtimeargs),
[](const AddressPtr &addr) -> void * { return addr->addr; });
}
if (!tiling_data_.empty() && tiling_data_ptr_ != nullptr) {
runtimeargs.push_back(tiling_data_ptr_);
}
rtL2Ctrl_t *l2ctrl = nullptr;
auto args_size = static_cast<uint32_t>(UlongToUint(sizeof(void *)) * runtimeargs.size());
auto node_info = cnode->fullname_with_scope();
const auto dev_func =
origin_key_.find("kernel0") != origin_key_.npos ? origin_key_ : origin_key_ + "_" + std::to_string(tiling_key_);
const auto kernel_info = node_info + "/" + std::to_string(tiling_key_);
// cppcheck-suppress unreadVariable
auto lock = AscendKernelMod::LockRuntime();
auto ret = rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, runtimeargs.data(), args_size, l2ctrl,
stream_ptr, kernel_info.c_str());
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call runtime rtKernelLaunchWithHandle error. Node info: " << node_info;
return false;
}
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,65 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TBE_DYNAMIC_TBE_KERNEL_MOD_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TBE_DYNAMIC_TBE_KERNEL_MOD_H_
#include <memory>
#include <string>
#include <vector>
#include <utility>
#include <map>
#include "backend/kernel_compiler/tbe/tbe_kernel_mod.h"
#include "backend/kernel_compiler/tbe/tbe_utils.h"
#include "runtime/device/device_address.h"
#include "ir/tensor.h"
namespace mindspore {
namespace kernel {
class DynamicTbeKernelMod : public TbeKernelMod {
public:
explicit DynamicTbeKernelMod(KernelPackPtr kernel_pack) : TbeKernelMod(kernel_pack) {} // maybe delete later
DynamicTbeKernelMod(KernelPackPtr kernel_pack, const AnfNodePtr &anf_node_ptr);
~DynamicTbeKernelMod() override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
void InferOp() override;
void InitOp() override;
private:
void InferShapeRecursive();
void InferShapeForNopNode(AnfNodePtr *input_node);
std::string ParseCompileJson(const CNodePtr &cnode);
void InitTilingDataPtr();
bool CopyTilingToDevice(void *stream_ptr);
bool NeedSkipExecute(const CNodePtr &cnode);
uint32_t block_dim_ = 1;
std::string tiling_data_;
void *tiling_data_ptr_ = nullptr;
uint32_t tiling_key_{0};
void *handle_ = nullptr;
std::string origin_key_{""};
std::string op_compile_info_{};
bool need_skip_execute_ = false;
};
using DynamicTbeKernelModPtr = std::shared_ptr<DynamicTbeKernelMod>;
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TBE_TBE_KERNEL_MOD_H_

View File

@ -15,6 +15,8 @@
*/
#include "backend/kernel_compiler/tbe/tbe_kernel_mod.h"
#include <algorithm>
#include "runtime/rt.h"
#include "utils/ms_context.h"
#include "runtime/device/ascend/ge_runtime/task_info.h"
@ -41,6 +43,20 @@ bool TbeKernelMod::Launch(const std::vector<mindspore::kernel::AddressPtr> &inpu
if (stream_ == nullptr) {
stream_ = stream_ptr;
}
// launch atomic_cleans first
if (!atomic_clean_nodes_.empty()) {
for (const auto &atomic_clean_node : atomic_clean_nodes_) {
KernelLaunchInfo kernel_launch_info;
auto kernel_mod = AnfAlgo::GetKernelMod(atomic_clean_node);
MS_EXCEPTION_IF_NULL(kernel_mod);
device::KernelRuntime::GenLaunchArgs(*kernel_mod, atomic_clean_node, &kernel_launch_info);
auto atomic_inputs = kernel_launch_info.inputs_;
std::vector<AddressPtr> atomic_outputs;
std::vector<AddressPtr> atomic_workspace;
kernel_mod->Launch(atomic_inputs, atomic_workspace, atomic_outputs, stream_ptr);
}
}
uint32_t blockdim = 1; // default blockdim equal to 1.
auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &blockdim);
if (func_stub == 0) {
@ -61,6 +77,7 @@ bool TbeKernelMod::Launch(const std::vector<mindspore::kernel::AddressPtr> &inpu
rtL2Ctrl_t *l2ctrl = nullptr;
const void *stubFunc = reinterpret_cast<void *>(func_stub);
auto argsSize = static_cast<uint32_t>(UlongToUint(sizeof(void *)) * runtimeargs.size());
auto lock = AscendKernelMod::LockRuntime();
auto ret = rtKernelLaunch(stubFunc, blockdim, runtimeargs.data(), argsSize, l2ctrl, stream_);
if (ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call runtime rtKernelLaunch error.";

View File

@ -29,6 +29,8 @@ namespace kernel {
class TbeKernelMod : public AscendKernelMod {
public:
explicit TbeKernelMod(KernelPackPtr kernel_pack) : kernel_pack_(std::move(kernel_pack)) {}
TbeKernelMod(KernelPackPtr kernel_pack, const AnfNodePtr &anf_node_ptr)
: AscendKernelMod(anf_node_ptr), kernel_pack_(std::move(kernel_pack)) {}
~TbeKernelMod() override = default;
void SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
@ -45,7 +47,7 @@ class TbeKernelMod : public AscendKernelMod {
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
std::vector<size_t> GenParameters() override;
private:
protected:
KernelPackPtr kernel_pack_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;

View File

@ -729,9 +729,8 @@ KernelWithIndex AnfRuntimeAlgorithm::GetPrevNodeOutput(const AnfNodePtr &anf_nod
auto kernel_info = anf_node->kernel_info();
if (kernel_info) {
auto runtime_cache = kernel_info->runtime_cache();
MS_EXCEPTION_IF_NULL(runtime_cache);
if (runtime_cache->is_valid()) {
auto output = runtime_cache->get_prev_node_output(input_idx);
if (runtime_cache.runtime_cache().is_valid()) {
auto output = runtime_cache.runtime_cache().get_prev_node_output(input_idx);
if (output.first != nullptr) {
return output;
}
@ -747,9 +746,8 @@ KernelWithIndex AnfRuntimeAlgorithm::GetPrevNodeOutput(const AnfNodePtr &anf_nod
}
if (kernel_info) {
auto runtime_cache = kernel_info->runtime_cache();
MS_EXCEPTION_IF_NULL(runtime_cache);
if (runtime_cache->is_valid()) {
runtime_cache->set_prev_node_output(input_idx, res);
if (runtime_cache.runtime_cache().is_valid()) {
runtime_cache.runtime_cache().set_prev_node_output(input_idx, res);
}
}
return res;
@ -2065,7 +2063,7 @@ std::vector<int64_t> AnfRuntimeAlgorithm::GetOutputMinShape(const AnfNodePtr &an
}
}
bool IsNodeInputDynamicShape(const CNodePtr &anf_node_ptr) {
bool AnfRuntimeAlgorithm::IsNodeInputDynamicShape(const CNodePtr &anf_node_ptr) {
MS_EXCEPTION_IF_NULL(anf_node_ptr);
auto input_num = AnfAlgo::GetInputTensorNum(anf_node_ptr);
for (size_t i = 0; i < input_num; ++i) {
@ -2274,6 +2272,7 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map<uint32_t, te
AbstractBasePtrList args_spec_list;
auto primitive = GetValueNode<PrimitivePtr>(inputs[0]);
auto input_size = AnfAlgo::GetInputTensorNum(node);
std::vector<AnfNodePtr> input_nodes;
for (size_t i = 0; i < input_size; ++i) {
auto input_with_index = AnfAlgo::GetPrevNodeOutput(node, i);
auto real_input = input_with_index.first;
@ -2289,9 +2288,12 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map<uint32_t, te
// sync data from device to host
tensor_ptr->data_sync();
}
auto real_abs = real_input->abstract();
// cppcheck-suppress unreadVariable
auto lock = AnfUtils::GetAbstractLock(real_input.get());
MS_EXCEPTION_IF_NULL(real_input->abstract());
auto real_abs = real_input->abstract()->Clone();
if (real_abs->isa<abstract::AbstractTensor>()) {
real_input->abstract()->set_value(tensor_ptr);
real_abs->set_value(tensor_ptr);
} else if (real_abs->isa<abstract::AbstractTuple>()) {
auto tuple_get_item_index = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
auto abstract_tuple = real_abs->cast<abstract::AbstractTuplePtr>();
@ -2299,15 +2301,27 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map<uint32_t, te
auto tuple_elements = abstract_tuple->elements()[tuple_get_item_index];
tuple_elements->set_value(tensor_ptr);
}
real_input->set_abstract(real_abs);
}
}
AddArgList(&args_spec_list, cnode_input, real_input, i);
bool is_cnode_input = AddArgList(&args_spec_list, cnode_input, real_input, i);
if (is_cnode_input) {
input_nodes.push_back(cnode_input);
} else {
input_nodes.push_back(real_input);
}
}
std::vector<AbstractScope> locks;
std::transform(input_nodes.begin(), input_nodes.end(), std::back_inserter(locks),
[](const AnfNodePtr &input) { return AnfUtils::GetAbstractLock(input.get()); });
auto eval_result = opt::CppInferShape(primitive, args_spec_list);
locks.clear();
// cppcheck-suppress unreadVariable
auto lock = AnfUtils::GetAbstractLock(node.get());
node->set_abstract(eval_result);
}
void AnfRuntimeAlgorithm::AddArgList(AbstractBasePtrList *args_spec_list, const AnfNodePtr &cnode_input,
bool AnfRuntimeAlgorithm::AddArgList(AbstractBasePtrList *args_spec_list, const AnfNodePtr &cnode_input,
const AnfNodePtr &real_input, size_t index) {
if (AnfAlgo::CheckPrimitiveType(cnode_input, prim::kPrimTupleGetItem)) {
auto base_shape = real_input->Shape();
@ -2315,15 +2329,24 @@ void AnfRuntimeAlgorithm::AddArgList(AbstractBasePtrList *args_spec_list, const
MS_LOG(EXCEPTION) << "Node input is a tuple_get_item but real input node shape is not a TupleShape. trace: "
<< trace::DumpSourceLines(real_input);
}
// cppcheck-suppress unreadVariable
auto lock = AnfUtils::GetAbstractLock(real_input.get());
auto abs = real_input->abstract()->cast<abstract::AbstractTuplePtr>();
MS_EXCEPTION_IF_NULL(abs);
auto tuple_get_item_indexk = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
auto abs_i = abs->elements()[tuple_get_item_indexk];
(void)args_spec_list->emplace_back(abs_i);
return false;
} else if (cnode_input->isa<CNode>() && AnfAlgo::GetCNodeName(cnode_input) == prim::kPrimReshape->name()) {
// cppcheck-suppress unreadVariable
auto lock = AnfUtils::GetAbstractLock(cnode_input.get());
(void)args_spec_list->emplace_back(cnode_input->abstract());
return true;
} else {
// cppcheck-suppress unreadVariable
auto lock = AnfUtils::GetAbstractLock(real_input.get());
(void)args_spec_list->emplace_back(real_input->abstract());
return false;
}
}

View File

@ -288,6 +288,7 @@ class AnfRuntimeAlgorithm {
static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node);
// get fix output precision from prev node, input_idx is the input index of current node related to prev node.
static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx);
static bool IsNodeInputDynamicShape(const CNodePtr &anf_node_ptr);
static bool IsDynamicShape(const AnfNodePtr &node);
static bool HasDynamicShapeFlag(const PrimitivePtr &prim);
static bool IsCondControlKernel(const CNodePtr &node);
@ -302,7 +303,8 @@ class AnfRuntimeAlgorithm {
static bool IsNodeDynamicShape(const AnfNodePtr &node);
static bool IsHostKernel(const CNodePtr &node);
static void InferShape(const CNodePtr &node, std::map<uint32_t, tensor::TensorPtr> *depend_tensors = nullptr);
static void AddArgList(AbstractBasePtrList *args_spec_list, const AnfNodePtr &cnode_input,
// return true if use cnode_input's abstract, false if use real_input's abstract
static bool AddArgList(AbstractBasePtrList *args_spec_list, const AnfNodePtr &cnode_input,
const AnfNodePtr &real_input, size_t index);
static std::vector<size_t> GetInputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index);
static std::vector<size_t> GetOutputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index);

View File

@ -123,8 +123,7 @@ void AscendEnableDynamicRuntimeCache(const KernelGraph *graph) {
}
MS_EXCEPTION_IF_NULL(kernel_info);
auto runtime_cache = kernel_info->runtime_cache();
MS_EXCEPTION_IF_NULL(runtime_cache);
runtime_cache->set_valid();
runtime_cache.runtime_cache().set_valid();
}
}
} // namespace

View File

@ -37,21 +37,21 @@ class OpTilingCalculateAdapter {
OpTilingCalculateAdapter() = default;
~OpTilingCalculateAdapter() = default;
ge::Operator AnfNodeToGeNodeAdapter(const CNodePtr &node, ge::ComputeGraphPtr *ge_graph,
::ge::Operator AnfNodeToGeNodeAdapter(const CNodePtr &node, ::ge::ComputeGraphPtr *ge_graph,
const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map,
const std::string &op_compile_info);
private:
void ConvertInputShapeAndType(const CNodePtr &node, ge::OpDescPtr *op_desc);
void ConvertOutputShapeAndType(const CNodePtr &node, ge::OpDescPtr *op_desc);
void ConvertCompileInfo(const CNodePtr &node, ge::OpDescPtr *op_desc);
void ConvertAttrs(const CNodePtr &node, ge::OpDescPtr *op_desc);
std::vector<std::tuple<std::size_t, ge::NodePtr>> ConvertDepends(
const CNodePtr &node, const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map, ge::OpDescPtr *op_desc,
ge::ComputeGraphPtr *ge_graph);
ge::NodePtr NewConstantOp(const CNodePtr &node, const std::string &name, const tensor::TensorPtr &tensor_data,
ge::ComputeGraphPtr *ge_graph, size_t index);
void AddEdge(const ge::NodePtr &ge_node, const std::vector<std::tuple<std::size_t, ge::NodePtr>> &constant_ops);
void ConvertInputShapeAndType(const CNodePtr &node, ::ge::OpDescPtr *op_desc);
void ConvertOutputShapeAndType(const CNodePtr &node, ::ge::OpDescPtr *op_desc);
void ConvertCompileInfo(const CNodePtr &node, ::ge::OpDescPtr *op_desc);
void ConvertAttrs(const CNodePtr &node, ::ge::OpDescPtr *op_desc);
std::vector<std::tuple<std::size_t, ::ge::NodePtr>> ConvertDepends(
const CNodePtr &node, const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map, ::ge::OpDescPtr *op_desc,
::ge::ComputeGraphPtr *ge_graph);
::ge::NodePtr NewConstantOp(const CNodePtr &node, const std::string &name, const tensor::TensorPtr &tensor_data,
::ge::ComputeGraphPtr *ge_graph, size_t index);
void AddEdge(const ::ge::NodePtr &ge_node, const std::vector<std::tuple<std::size_t, ::ge::NodePtr>> &constant_ops);
std::string GetRealOpType(const std::string &op_type);
std::string GetInputName(const CNodePtr &node, size_t index);
std::string GetOutputName(const CNodePtr &node, size_t index);

View File

@ -103,7 +103,7 @@ void DynamicKernel::InferShape() {
tuple_elements->set_value(out_tensor);
}
}
AnfAlgo::AddArgList(&args_spec_list, cnode_input, real_input, i);
(void)AnfAlgo::AddArgList(&args_spec_list, cnode_input, real_input, i);
}
auto eval_result = opt::CppInferShape(primitive, args_spec_list);
cnode->set_abstract(eval_result);

View File

@ -164,8 +164,7 @@ class DeviceContext {
}
MS_EXCEPTION_IF_NULL(kernel_info);
auto runtime_cache = kernel_info->runtime_cache();
MS_EXCEPTION_IF_NULL(runtime_cache);
runtime_cache->set_valid();
runtime_cache.runtime_cache().set_valid();
}
}

View File

@ -28,8 +28,21 @@
#include "ir/func_graph.h"
#include "ir/primitive.h"
#include "utils/ms_context.h"
#include "utils/anf_utils.h"
namespace mindspore {
const AbstractBasePtr &AnfNode::abstract() const {
// cppcheck-suppress unreadVariable
auto lock = AnfUtils::GetAbstractLock(this);
return abstract_;
}
void AnfNode::set_abstract(const AbstractBasePtr &abs) {
// cppcheck-suppress unreadVariable
auto lock = AnfUtils::GetAbstractLock(this);
abstract_ = abs;
}
// namespace to support intermediate representation definition
CNode::CNode(const std::vector<AnfNodePtr> &inputs, const FuncGraphPtr &func_graph)
: AnfNode(func_graph),
@ -574,9 +587,8 @@ std::string GetCNodeTarget(const AnfNodePtr &node) {
auto kernel_info = node->kernel_info();
if (kernel_info != nullptr) {
auto runtime_cache = kernel_info->runtime_cache();
MS_EXCEPTION_IF_NULL(runtime_cache);
if (runtime_cache->is_valid()) {
auto tmp_target = runtime_cache->device_target();
if (runtime_cache.runtime_cache().is_valid()) {
auto tmp_target = runtime_cache.runtime_cache().device_target();
if (!tmp_target.empty()) {
return tmp_target;
}
@ -595,9 +607,8 @@ std::string GetCNodeTarget(const AnfNodePtr &node) {
if (kernel_info != nullptr) {
auto runtime_cache = kernel_info->runtime_cache();
MS_EXCEPTION_IF_NULL(runtime_cache);
if (runtime_cache->is_valid()) {
runtime_cache->set_device_target(target);
if (runtime_cache.runtime_cache().is_valid()) {
runtime_cache.runtime_cache().set_device_target(target);
}
}
return target;

View File

@ -178,12 +178,12 @@ class MS_CORE_API AnfNode : public Base {
/// \brief Obtain the inferred abstract value of this AnfNode.
///
/// \return The inferred abstract value.
const AbstractBasePtr &abstract() const { return abstract_; }
const AbstractBasePtr &abstract() const;
/// \brief Set the abstract value of this AnfNode.
///
/// \param[in] abs New abstract value.
void set_abstract(const AbstractBasePtr &abs) { abstract_ = abs; }
void set_abstract(const AbstractBasePtr &abs);
/// \brief Obtain the intermediate abstract value of this AnfNode.
///

View File

@ -24,12 +24,21 @@
#include "ir/visitor.h"
#include "ir/func_graph.h"
#include "base/core_ops.h"
#include "utils/anf_utils.h"
namespace mindspore {
// namespace to support intermediate representation definition
// Methods of AnfNode
TypePtr AnfNode::Type() const { return (abstract_ == nullptr) ? nullptr : abstract_->BuildType(); }
BaseShapePtr AnfNode::Shape() const { return (abstract_ == nullptr) ? nullptr : abstract_->BuildShape(); }
TypePtr AnfNode::Type() const {
// cppcheck-suppress unreadVariable
auto lock = AnfUtils::GetAbstractLock(this);
return (abstract_ == nullptr) ? nullptr : abstract_->BuildType();
}
BaseShapePtr AnfNode::Shape() const {
// cppcheck-suppress unreadVariable
auto lock = AnfUtils::GetAbstractLock(this);
return (abstract_ == nullptr) ? nullptr : abstract_->BuildShape();
}
std::string AnfNode::ToString() const {
return mindspore::label_manage::Label(const_cast<AnfNode *>(this)->shared_from_base<AnfNode>()->debug_info());

View File

@ -68,13 +68,26 @@ class RuntimeCache {
// Interface for device kernel program information.
class KernelInfoDevice {
public:
class RuntimeCacheScope {
public:
RuntimeCacheScope(RuntimeCache &base, std::mutex &mu) : runtime_cache_(base), mu_(mu) { mu_.lock(); }
RuntimeCacheScope(const RuntimeCacheScope &other) = delete;
RuntimeCacheScope operator=(const RuntimeCacheScope &other) = delete;
~RuntimeCacheScope() { mu_.unlock(); }
RuntimeCache &runtime_cache() { return runtime_cache_; }
private:
RuntimeCache &runtime_cache_;
std::mutex &mu_;
};
// If kernel program was built and build info is set.
virtual bool has_build_info() const = 0;
RuntimeCache *runtime_cache() { return &runtime_cache_; }
RuntimeCacheScope runtime_cache() { return RuntimeCacheScope(runtime_cache_, mu_); }
private:
RuntimeCache runtime_cache_;
std::mutex mu_;
};
using KernelInfoDevicePtr = std::shared_ptr<KernelInfoDevice>;
} // namespace mindspore

View File

@ -15,6 +15,7 @@
*/
#include "utils/anf_utils.h"
#include <map>
#include <string>
#include "base/core_ops.h"
#include "utils/trace_base.h"
@ -23,8 +24,52 @@
namespace mindspore {
namespace {
const PrimitiveSet follow_first_input_prims = {prim::kPrimDepend, prim::kPrimLoad};
class AbstractMutexManager {
public:
static AbstractMutexManager &GetInstance() {
static AbstractMutexManager instance;
return instance;
}
AbstractScope GetAbstractLock(const AnfNode *node) {
std::lock_guard<std::recursive_mutex> lock(mu_);
return AbstractScope(&mu_for_nodes_[node]);
}
private:
std::map<const AnfNode *, std::recursive_mutex> mu_for_nodes_;
std::recursive_mutex mu_;
};
} // namespace
AbstractScope::AbstractScope(std::recursive_mutex *mu) {
MS_EXCEPTION_IF_NULL(mu);
mu_ = mu;
mu_->lock();
}
AbstractScope::AbstractScope(AbstractScope &&other) {
mu_ = other.mu_;
other.mu_ = nullptr;
}
AbstractScope &AbstractScope::operator=(AbstractScope &&other) {
mu_ = other.mu_;
other.mu_ = nullptr;
return *this;
}
AbstractScope::~AbstractScope() {
if (mu_ != nullptr) {
mu_->unlock();
}
}
AbstractScope AnfUtils::GetAbstractLock(const AnfNode *node) {
return AbstractMutexManager::GetInstance().GetAbstractLock(node);
}
bool AnfUtils::IsDimUnknown(const abstract::ShapePtr &shape) {
MS_EXCEPTION_IF_NULL(shape);
return std::any_of(shape->shape().begin(), shape->shape().end(), [](int64_t s) { return s < -1; });
@ -112,20 +157,18 @@ bool AnfUtils::IsRealKernel(const AnfNodePtr &node) {
auto kernel_info = cnode->kernel_info();
if (kernel_info) {
auto runtime_cache = kernel_info->runtime_cache();
MS_EXCEPTION_IF_NULL(runtime_cache);
if (runtime_cache->is_real_kernel() != CacheBool::UNCACHED) {
return (runtime_cache->is_real_kernel() == CacheBool::TRUE);
if (runtime_cache.runtime_cache().is_real_kernel() != CacheBool::UNCACHED) {
return (runtime_cache.runtime_cache().is_real_kernel() == CacheBool::TRUE);
}
}
bool res = !IsOneOfPrimitive(cnode->input(kAnfPrimitiveIndex), virtual_prims);
if (kernel_info) {
auto runtime_cache = kernel_info->runtime_cache();
MS_EXCEPTION_IF_NULL(runtime_cache);
if (res) {
runtime_cache->set_real_kernel(CacheBool::TRUE);
runtime_cache.runtime_cache().set_real_kernel(CacheBool::TRUE);
} else {
runtime_cache->set_real_kernel(CacheBool::FALSE);
runtime_cache.runtime_cache().set_real_kernel(CacheBool::FALSE);
}
}
@ -175,10 +218,15 @@ size_t AnfUtils::GetInputTensorNum(const AnfNodePtr &node) {
MS_LOG(EXCEPTION) << "Only cnode has real input, but this anf is " << node->DebugString()
<< trace::DumpSourceLines(node);
}
{
// cppcheck-suppress unreadVariable
auto lock = AnfUtils::GetAbstractLock(node.get());
ssize_t input_tensor_num = cnode->input_tensor_num();
if (input_tensor_num >= 0) {
return static_cast<size_t>(input_tensor_num);
}
}
size_t input_num = cnode->inputs().size();
if (input_num == 0) {
MS_LOG(EXCEPTION) << "Cnode inputs size can't be zero" << trace::DumpSourceLines(node);
@ -191,6 +239,8 @@ size_t AnfUtils::GetInputTensorNum(const AnfNodePtr &node) {
auto &inputs = cnode->inputs();
// Search monad inputs, backward.
for (auto iter = inputs.rbegin(); iter != inputs.rend(); ++iter) {
// cppcheck-suppress unreadVariable
auto lock = AnfUtils::GetAbstractLock(node.get());
if (!HasAbstractMonad(*iter)) {
// Stop count if we encounter a non-monad input.
break;
@ -198,6 +248,8 @@ size_t AnfUtils::GetInputTensorNum(const AnfNodePtr &node) {
--input_num;
}
}
// cppcheck-suppress unreadVariable
auto lock = AnfUtils::GetAbstractLock(node.get());
cnode->set_input_tensor_num(static_cast<ssize_t>(input_num));
return input_num;
}
@ -207,8 +259,8 @@ size_t AnfUtils::GetOutputTensorNum(const AnfNodePtr &node) {
auto kernel_info = node->kernel_info();
if (kernel_info) {
auto runtime_cache = kernel_info->runtime_cache();
if (runtime_cache->is_valid()) {
ssize_t output_tensor_num = runtime_cache->output_tensor_num();
if (runtime_cache.runtime_cache().is_valid()) {
ssize_t output_tensor_num = runtime_cache.runtime_cache().output_tensor_num();
if (output_tensor_num >= 0) {
return static_cast<size_t>(output_tensor_num);
}
@ -231,8 +283,8 @@ size_t AnfUtils::GetOutputTensorNum(const AnfNodePtr &node) {
if (kernel_info) {
auto runtime_cache = kernel_info->runtime_cache();
if (runtime_cache->is_valid()) {
runtime_cache->set_output_tensor_num(static_cast<ssize_t>(res));
if (runtime_cache.runtime_cache().is_valid()) {
runtime_cache.runtime_cache().set_output_tensor_num(static_cast<ssize_t>(res));
}
}
return res;

View File

@ -25,6 +25,19 @@
#include "ir/primitive.h"
namespace mindspore {
class AbstractScope {
public:
explicit AbstractScope(std::recursive_mutex *mu);
AbstractScope(const AbstractScope &other) = delete;
AbstractScope operator=(const AbstractScope &other) = delete;
AbstractScope(AbstractScope &&other);
AbstractScope &operator=(AbstractScope &&other);
~AbstractScope();
private:
std::recursive_mutex *mu_;
};
class AnfUtils {
public:
static bool IsDimUnknown(const abstract::ShapePtr &shape);
@ -52,6 +65,7 @@ class AnfUtils {
static void SetDumpFlag(const AnfNodePtr &node);
// Get dump flag from CNode's primitive.
static bool GetDumpFlag(const AnfNodePtr &node);
static AbstractScope GetAbstractLock(const AnfNode *node);
};
} // namespace mindspore
#endif // MINDSPORE_CORE_UTILS_ANF_UTILS_H_

View File

@ -182,6 +182,13 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"../../../mindspore/ccsrc/profiler/device/ascend/*.cc"
"../../../mindspore/ccsrc/profiler/device/profiling.cc"
"../../../mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.c"
"../../../mindspore/ccsrc/backend/kernel_compiler/kernel.cc"
"../../../mindspore/ccsrc/backend/kernel_compiler/ascend_kernel_mod.cc"
"../../../mindspore/ccsrc/backend/optimizer/common/helper.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/executor/aicpu_ext_info_handle.cc"
"../../../mindspore/ccsrc/runtime/device/ascend/ge_types_convert.cc"
"../../../mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.cc"
)
if(ENABLE_SECURITY)
@ -230,6 +237,24 @@ add_dependencies(_ut_ut_obj engine-cache-server graph)
add_executable(ut_tests $<TARGET_OBJECTS:_ut_ut_obj>
$<TARGET_OBJECTS:_ut_mindspore_obj>)
include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu")
file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"../../../mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/*.proto")
ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN})
file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"../../../mindspore/ccsrc/runtime/device/ascend/dump/proto/*.proto")
ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP})
list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS})
list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS})
list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS})
if(MINDSPORE_PROTO_LIST)
add_library(proto_input_ut STATIC ${MINDSPORE_PROTO_LIST})
set_target_properties(proto_input_ut PROPERTIES COMPILE_FLAGS "-Wno-unused-variable")
endif()
if(ENABLE_GE)
if(ENABLE_TRAIN)
target_link_libraries(ut_tests PRIVATE graph ge_runner)

View File

@ -0,0 +1,75 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "register/op_tiling_info.h"
#include "register/op_tiling.h"
namespace optiling {
using std::make_shared;
extern "C" ge::graphStatus OpParaCalculateV2(const ge::Operator &op, OpRunInfoV2 &run_info) {
return ge::GRAPH_SUCCESS;
}
namespace utils {
OpRunInfo::OpRunInfo() {}
OpRunInfo::OpRunInfo(const uint32_t &block_dim, const bool &clear_atomic, const uint64_t &tiling_key) {}
OpRunInfo::OpRunInfo(const OpRunInfo &runinfo) {}
OpRunInfo::OpRunInfo(OpRunInfo &&runinfo) {}
OpRunInfo &OpRunInfo::operator=(const OpRunInfo &runinfo) { return *this; }
OpRunInfo &OpRunInfo::operator=(OpRunInfo &&runinfo) { return *this; }
void OpRunInfo::SetBlockDim(const uint32_t &block_dim) { return; }
uint32_t OpRunInfo::GetBlockDim() const { return 0; }
void OpRunInfo::AddWorkspace(const int64_t &workspace) { return; }
size_t OpRunInfo::GetWorkspaceNum() const { return 0; }
ge::graphStatus OpRunInfo::GetWorkspace(const size_t &idx, int64_t &workspace) const { return ge::GRAPH_SUCCESS; }
void OpRunInfo::GetAllWorkspaces(std::vector<int64_t> &workspaces) const { return; }
void OpRunInfo::SetWorkspaces(const std::vector<int64_t> &workspaces) { return; }
void OpRunInfo::InternelSetTiling(const ByteBuffer &value) { return; }
void OpRunInfo::AddTilingData(const char *_value, size_t _size) { return; }
ByteBuffer &OpRunInfo::GetAllTilingData() {
std::shared_ptr<ByteBuffer> tiling_data = std::make_shared<ByteBuffer>();
return *tiling_data;
}
const ByteBuffer &OpRunInfo::GetAllTilingData() const {
std::shared_ptr<ByteBuffer> tiling_data = std::make_shared<ByteBuffer>();
return *tiling_data;
}
void OpRunInfo::SetClearAtomic(bool clear_atomic_input) { return; }
bool OpRunInfo::GetClearAtomic() const { return true; }
void OpRunInfo::SetTilingKey(const uint64_t &new_tiling_key) { return; }
uint64_t OpRunInfo::GetTilingKey() const { return 0; }
} // namespace utils
} // namespace optiling

View File

@ -211,3 +211,9 @@ RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size
RTS_API rtError_t rtProfRegisterCtrlCallback(uint32_t moduleId, rtProfCtrlHandle callback) { return RT_ERROR_NONE; }
RTS_API rtError_t rtGetRtCapability(rtFeatureType_t, int32_t, int64_t *) { return RT_ERROR_NONE; }
RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args,
uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream,
const void *kernelInfo) {
return RT_ERROR_NONE;
}