forked from mindspore-Ecosystem/mindspore
!28310 dynamic_kernel_mod
Merge pull request !28310 from TuDouNi/dynamic_shape_stage1
This commit is contained in:
commit
e4438f3028
|
@ -1,5 +1,6 @@
|
|||
file(GLOB_RECURSE KERNEL_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"kernel_build_info.cc"
|
||||
"kernel.cc"
|
||||
"kash/*.cc"
|
||||
"common_utils.cc"
|
||||
"oplib/*.cc"
|
||||
|
@ -12,6 +13,7 @@ endif()
|
|||
|
||||
if(ENABLE_D)
|
||||
file(GLOB_RECURSE D_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"ascend_kernel_mod.cc"
|
||||
"kernel_query.cc"
|
||||
"tbe/*.cc"
|
||||
"host/*.cc"
|
||||
|
|
|
@ -36,13 +36,12 @@ using HostDynamicKernel = mindspore::device::ascend::HostDynamicKernel;
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
AicpuOpKernelMod::AicpuOpKernelMod() : anf_node_(nullptr) {}
|
||||
AicpuOpKernelMod::AicpuOpKernelMod() {}
|
||||
|
||||
AicpuOpKernelMod::~AicpuOpKernelMod() {
|
||||
args_.clear();
|
||||
inputList_.clear();
|
||||
outputList_.clear();
|
||||
anf_node_ = nullptr;
|
||||
input_list_.clear();
|
||||
output_list_.clear();
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
|
@ -55,9 +54,9 @@ void AicpuOpKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) {
|
|||
const std::vector<size_t> &AicpuOpKernelMod::GetOutputSizeList() const { return output_size_list_; }
|
||||
void AicpuOpKernelMod::SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; }
|
||||
const std::vector<size_t> &AicpuOpKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
|
||||
void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &inputList) { inputList_ = inputList; }
|
||||
void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &outputList) { outputList_ = outputList; }
|
||||
void AicpuOpKernelMod::SetNodeDef(const std::string &nodeDef) { (void)node_def_str_.assign(nodeDef); }
|
||||
void AicpuOpKernelMod::SetInputList(const std::vector<int64_t> &input_list) { input_list_ = input_list; }
|
||||
void AicpuOpKernelMod::SetOutputList(const std::vector<int64_t> &output_list) { output_list_ = output_list; }
|
||||
void AicpuOpKernelMod::SetNodeDef(const std::string &node_def) { (void)node_def_str_.assign(node_def); }
|
||||
void AicpuOpKernelMod::SetExtInfo(const std::string &ext_info) { ext_info_ = ext_info; }
|
||||
void AicpuOpKernelMod::SetNodeName(const std::string &node_name) { node_name_ = node_name; }
|
||||
void AicpuOpKernelMod::SetCustSo(const std::string &cust_so) {
|
||||
|
@ -85,11 +84,18 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs
|
|||
node_so_ = kLibAicpuKernelSoName;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (kCpuKernelBaseOps.find(node_name_) == kCpuKernelBaseOps.end()) {
|
||||
node_name_ = kCpuRunApi;
|
||||
}
|
||||
} else if (kCpuKernelBaseOps.find(node_name_) == kCpuKernelBaseOps.end()) {
|
||||
node_name_ = kCpuRunApi;
|
||||
}
|
||||
|
||||
if (node_name_ == kTopK) {
|
||||
node_name_ = kTopKV2;
|
||||
}
|
||||
|
||||
if (node_name_ == kStack) {
|
||||
node_name_ = kPack;
|
||||
}
|
||||
|
||||
// InputOutputAddr
|
||||
vector<void *> io_addrs;
|
||||
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(io_addrs),
|
||||
|
@ -120,6 +126,8 @@ void AicpuOpKernelMod::CreateCpuKernelInfo(const std::vector<AddressPtr> &inputs
|
|||
aicpu_param_head.extInfoAddr = 0;
|
||||
} else {
|
||||
MS_LOG(INFO) << "Dynamic Kernel Ext Info size:" << ext_info_.size();
|
||||
aicpu_param_head.extInfoLength = SizeToUint(ext_info_.size());
|
||||
aicpu_param_head.extInfoAddr = reinterpret_cast<uint64_t>(ext_info_addr_dev_);
|
||||
}
|
||||
|
||||
args_.clear();
|
||||
|
@ -162,6 +170,8 @@ bool AicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::
|
|||
}
|
||||
MS_LOG(INFO) << "Aicpu launch, node_so_:" << node_so_ << ", node name:" << node_name_
|
||||
<< ", args_size:" << args_.length();
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AscendKernelMod::LockRuntime();
|
||||
if (rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(node_so_.c_str()),
|
||||
reinterpret_cast<const void *>(node_name_.c_str()), 1,
|
||||
reinterpret_cast<const void *>(args_.data()), static_cast<uint32_t>(args_.length()),
|
||||
|
|
|
@ -25,6 +25,8 @@ namespace kernel {
|
|||
class AicpuOpKernelMod : public AscendKernelMod {
|
||||
public:
|
||||
AicpuOpKernelMod();
|
||||
explicit AicpuOpKernelMod(const AnfNodePtr &anf_node_ptr) : AscendKernelMod(anf_node_ptr) {}
|
||||
|
||||
~AicpuOpKernelMod() override;
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
|
@ -33,10 +35,10 @@ class AicpuOpKernelMod : public AscendKernelMod {
|
|||
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
|
||||
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
|
||||
|
||||
void SetInputList(const std::vector<int64_t> &inputList);
|
||||
void SetOutputList(const std::vector<int64_t> &outputList);
|
||||
void SetInputList(const std::vector<int64_t> &input_list);
|
||||
void SetOutputList(const std::vector<int64_t> &output_list);
|
||||
void SetAnfNode(const AnfNodePtr &anf_node);
|
||||
void SetNodeDef(const std::string &nodeDef);
|
||||
void SetNodeDef(const std::string &node_def);
|
||||
void SetExtInfo(const std::string &ext_info);
|
||||
void SetNodeName(const std::string &node_name);
|
||||
void SetCustSo(const std::string &cust_so);
|
||||
|
@ -56,16 +58,18 @@ class AicpuOpKernelMod : public AscendKernelMod {
|
|||
const std::vector<size_t> &GetOutputSizeList() const override;
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override;
|
||||
|
||||
private:
|
||||
bool cust_kernel_{false};
|
||||
protected:
|
||||
std::string args_;
|
||||
std::string node_def_str_;
|
||||
std::string ext_info_;
|
||||
std::string node_name_;
|
||||
std::string node_so_;
|
||||
std::string ext_info_;
|
||||
std::vector<int64_t> inputList_;
|
||||
std::vector<int64_t> outputList_;
|
||||
AnfNodePtr anf_node_;
|
||||
bool cust_kernel_{false};
|
||||
std::string node_def_str_;
|
||||
void *ext_info_addr_dev_ = nullptr;
|
||||
|
||||
private:
|
||||
std::vector<int64_t> input_list_;
|
||||
std::vector<int64_t> output_list_;
|
||||
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
|
|
|
@ -0,0 +1,231 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/aicpu/dynamic_aicpu_kernel_mod.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "runtime/mem.h"
|
||||
#include "acl/acl_rt.h"
|
||||
#include "utils/convert_utils.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "runtime/device/kernel_runtime.h"
|
||||
#include "runtime/kernel.h"
|
||||
#include "utils/utils.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
DynamicAicpuOpKernelMod::DynamicAicpuOpKernelMod(const AnfNodePtr &anf_node_ptr) : AicpuOpKernelMod(anf_node_ptr) {
|
||||
unknow_type_ = device::ascend::UnknowShapeOpType::DEPEND_IN_SHAPE;
|
||||
auto cnode = anf_node_ptr->cast<CNodePtr>();
|
||||
if (cnode != nullptr) {
|
||||
auto op_name = AnfAlgo::GetCNodeName(cnode);
|
||||
if (kComputeDepend.find(op_name) != kComputeDepend.end()) {
|
||||
unknow_type_ = device::ascend::UnknowShapeOpType::DEPEND_COMPUTE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DynamicAicpuOpKernelMod::~DynamicAicpuOpKernelMod() {
|
||||
// free dev ptr
|
||||
if (ext_info_addr_dev_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
auto ret = rtFree(ext_info_addr_dev_);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "rtFree failed";
|
||||
}
|
||||
}
|
||||
|
||||
void DynamicAicpuOpKernelMod::InferOp() {
|
||||
auto node = anf_node_.lock();
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (!AnfAlgo::IsDynamicShape(node)) {
|
||||
MS_LOG(EXCEPTION) << "The node is not dynamic shape.";
|
||||
}
|
||||
KernelMod::InferShape();
|
||||
}
|
||||
|
||||
void DynamicAicpuOpKernelMod::InitOp() {
|
||||
auto node = anf_node_.lock();
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (!AnfAlgo::IsDynamicShape(cnode)) {
|
||||
MS_LOG(EXCEPTION) << "The node is not dynamic shape: " << cnode->fullname_with_scope();
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "UpdateExtInfo of " << cnode->fullname_with_scope() << " start";
|
||||
auto input_num = AnfAlgo::GetInputTensorNum(cnode);
|
||||
auto output_num = AnfAlgo::GetOutputTensorNum(cnode);
|
||||
if (input_num == 0 && output_num == 0) {
|
||||
MS_LOG(INFO) << "Node:" << cnode->fullname_with_scope() << " no need to update output shape";
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse aicpu ext info
|
||||
ext_info_handler_ = std::make_shared<device::ascend::AicpuExtInfoHandler>(
|
||||
cnode->fullname_with_scope(), static_cast<uint32_t>(input_num), static_cast<uint32_t>(output_num), unknow_type_);
|
||||
MS_EXCEPTION_IF_NULL(ext_info_handler_);
|
||||
if (!ext_info_handler_->Parse(ext_info_)) {
|
||||
MS_LOG(EXCEPTION) << "Parse AiCpu ext_info_handler failed";
|
||||
}
|
||||
|
||||
if (ext_info_.empty()) {
|
||||
MS_LOG(INFO) << "No need to copy to device, ext_info_ is empty. ";
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < input_num; ++i) {
|
||||
if (!ext_info_handler_->UpdateInputShapeAndType(i, NOT_NULL(cnode))) {
|
||||
MS_LOG(EXCEPTION) << "Update input shape failed, cnode:" << cnode->fullname_with_scope() << " input:" << i;
|
||||
}
|
||||
}
|
||||
|
||||
if (unknow_type_ != device::ascend::UnknowShapeOpType::DEPEND_COMPUTE) {
|
||||
for (size_t i = 0; i < output_num; ++i) {
|
||||
if (!ext_info_handler_->UpdateOutputShapeAndType(i, NOT_NULL(cnode))) {
|
||||
MS_LOG(EXCEPTION) << "Update output shape failed, cnode:" << cnode->fullname_with_scope() << " output:" << i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DynamicAicpuOpKernelMod::AllocateExtInfoDeviceAddr(const CNodePtr &cnode) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (ext_info_addr_dev_ != nullptr) {
|
||||
return;
|
||||
}
|
||||
// Allocate ext info addr in device
|
||||
if (ext_info_.size() != 0) {
|
||||
auto ret = rtMalloc(&ext_info_addr_dev_, ext_info_.size(), RT_MEMORY_HBM);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "Call rtMalloc ext_info_addr_dev_ failed. Op name: " << cnode->fullname_with_scope();
|
||||
}
|
||||
}
|
||||
ext_info_size_ = ext_info_.size();
|
||||
}
|
||||
|
||||
bool DynamicAicpuOpKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
|
||||
if (stream_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
|
||||
return false;
|
||||
}
|
||||
if (stream_ == nullptr) {
|
||||
stream_ = stream_ptr;
|
||||
}
|
||||
auto node = anf_node_.lock();
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_LOG(INFO) << "Start launch of node: " << cnode->fullname_with_scope();
|
||||
|
||||
// is dynamic shape
|
||||
if (!AnfAlgo::IsDynamicShape(cnode)) {
|
||||
MS_LOG(EXCEPTION) << "The cnode is not dynamic shape:" << cnode->fullname_with_scope();
|
||||
}
|
||||
|
||||
// copy extinfo to device
|
||||
AllocateExtInfoDeviceAddr(cnode);
|
||||
MS_EXCEPTION_IF_NULL(ext_info_handler_);
|
||||
auto ret = aclrtMemcpy(ext_info_addr_dev_, ext_info_size_, ext_info_handler_->GetExtInfo(),
|
||||
ext_info_handler_->GetExtInfoLen(), ACL_MEMCPY_HOST_TO_DEVICE);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "UpdateExtInfo aclrtMemcpy failed. Node info: " << cnode->fullname_with_scope();
|
||||
return false;
|
||||
}
|
||||
|
||||
AicpuOpKernelMod::CreateCpuKernelInfo(inputs, outputs);
|
||||
MS_LOG(INFO) << "Aicpu launch, node_so_:" << node_so_ << ", node name:" << node_name_
|
||||
<< ", args_size:" << args_.length();
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AscendKernelMod::LockRuntime();
|
||||
ret = rtCpuKernelLaunchWithFlag(reinterpret_cast<const void *>(node_so_.c_str()),
|
||||
reinterpret_cast<const void *>(node_name_.c_str()), 1,
|
||||
reinterpret_cast<const void *>(args_.data()), static_cast<uint32_t>(args_.length()),
|
||||
nullptr, stream_, RT_KERNEL_DEFAULT);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Aicpu op launch failed!";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (unknow_type_ == device::ascend::UnknowShapeOpType::DEPEND_COMPUTE) {
|
||||
ret = aclrtMemcpyAsync(ext_info_handler_->GetExtInfo(), ext_info_handler_->GetExtInfoLen(), ext_info_addr_dev_,
|
||||
ext_info_size_, ACL_MEMCPY_DEVICE_TO_HOST, stream_);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "aclrtMemcpyAsync output shape failed. Op name: " << cnode->fullname_with_scope();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void DynamicAicpuOpKernelMod::UpdateOp() {
|
||||
auto node = anf_node_.lock();
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_LOG(INFO) << "Aicpu " << cnode->fullname_with_scope() << " PostExecute";
|
||||
// is dynamic shape
|
||||
if (!AnfAlgo::IsDynamicShape(cnode)) {
|
||||
MS_LOG(EXCEPTION) << "The cnode is not dynamic shape:" << cnode->fullname_with_scope();
|
||||
}
|
||||
|
||||
if (unknow_type_ != device::ascend::UnknowShapeOpType::DEPEND_COMPUTE) {
|
||||
MS_LOG(INFO) << "Node " << node->fullname_with_scope() << " update op skip.";
|
||||
return;
|
||||
}
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AscendKernelMod::LockRuntime();
|
||||
auto ret = rtStreamSynchronize(stream_);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "Call runtime rtStreamSynchronize failed. Op name: " << cnode->fullname_with_scope();
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Update aicpu kernel output shape from ext_info. Op name: " << cnode->fullname_with_scope();
|
||||
UpdateOutputShapeFromExtInfo(cnode);
|
||||
}
|
||||
|
||||
bool DynamicAicpuOpKernelMod::UpdateOutputShapeFromExtInfo(const CNodePtr &cnode) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_LOG(INFO) << "UpdateOutputShapeFromExtInfo start. Op name " << cnode->fullname_with_scope();
|
||||
MS_EXCEPTION_IF_NULL(ext_info_handler_);
|
||||
|
||||
std::vector<TypeId> type_ids;
|
||||
std::vector<std::vector<size_t>> shapes;
|
||||
auto output_num = AnfAlgo::GetOutputTensorNum(cnode);
|
||||
for (size_t i = 0; i < output_num; ++i) {
|
||||
MS_LOG(INFO) << "Get output:" << output_num << " Shape";
|
||||
std::vector<int64_t> shape;
|
||||
TypeId type_id;
|
||||
(void)ext_info_handler_->GetOutputShapeAndType(SizeToUint(i), NOT_NULL(&shape), NOT_NULL(&type_id));
|
||||
type_ids.emplace_back(type_id);
|
||||
std::vector<size_t> size_t_shape;
|
||||
std::transform(shape.begin(), shape.end(), std::back_inserter(size_t_shape), LongToSize);
|
||||
shapes.emplace_back(size_t_shape);
|
||||
}
|
||||
|
||||
AnfAlgo::SetOutputInferTypeAndShape(type_ids, shapes, cnode.get());
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,54 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_DYNAMIC_AICPU_KERNEL_MOD_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_DYNAMIC_AICPU_KERNEL_MOD_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_kernel_mod.h"
|
||||
#include "backend/kernel_compiler/aicpu/aicpu_util.h"
|
||||
#include "runtime/device/ascend/executor/aicpu_ext_info_handle.h"
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class DynamicAicpuOpKernelMod : public AicpuOpKernelMod {
|
||||
public:
|
||||
DynamicAicpuOpKernelMod() : unknow_type_(device::ascend::UnknowShapeOpType::DEPEND_IN_SHAPE) {}
|
||||
explicit DynamicAicpuOpKernelMod(const AnfNodePtr &anf_node_ptr);
|
||||
|
||||
~DynamicAicpuOpKernelMod() override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
|
||||
void InferOp() override;
|
||||
void InitOp() override;
|
||||
void UpdateOp() override;
|
||||
|
||||
private:
|
||||
void AllocateExtInfoDeviceAddr(const CNodePtr &cnode);
|
||||
bool UpdateOutputShapeFromExtInfo(const CNodePtr &cnode);
|
||||
|
||||
std::shared_ptr<device::ascend::AicpuExtInfoHandler> ext_info_handler_ = nullptr;
|
||||
size_t ext_info_size_ = 0;
|
||||
device::ascend::UnknowShapeOpType unknow_type_;
|
||||
};
|
||||
|
||||
using DynamicAicpuOpKernelModPtr = std::shared_ptr<DynamicAicpuOpKernelMod>;
|
||||
using DynamicAicputOpKernelModPtrList = std::vector<DynamicAicpuOpKernelModPtr>;
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AICPU_DYNAMIC_AICPU_KERNEL_MOD_H_
|
|
@ -0,0 +1,35 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/ascend_kernel_mod.h"
|
||||
#include "runtime/rt.h"
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
void AscendKernelMod::UpdateOp() {
|
||||
MS_EXCEPTION_IF_NULL(stream_);
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = LockRuntime();
|
||||
if (RT_ERROR_NONE != rtStreamSynchronize(stream_)) {
|
||||
MS_LOG(EXCEPTION) << "Call runtime rtStreamSynchronize failed.";
|
||||
}
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> AscendKernelMod::LockRuntime() {
|
||||
static std::mutex mutex;
|
||||
return std::lock_guard<std::mutex>(mutex);
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -31,6 +31,8 @@ namespace mindspore {
|
|||
namespace kernel {
|
||||
class AscendKernelMod : public KernelMod {
|
||||
public:
|
||||
AscendKernelMod() {}
|
||||
explicit AscendKernelMod(const AnfNodePtr &anf_node_ptr) : KernelMod(anf_node_ptr) {}
|
||||
virtual std::vector<TaskInfoPtr> GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &, uint32_t) = 0;
|
||||
uint32_t block_dim() { return block_dim_; }
|
||||
|
@ -44,6 +46,7 @@ class AscendKernelMod : public KernelMod {
|
|||
return false;
|
||||
#endif
|
||||
}
|
||||
void UpdateOp() override;
|
||||
|
||||
void InitDynamicKernel(const CNodePtr &cnode_ptr, void *stream) {
|
||||
if (dynamic_kernel_ == nullptr) {
|
||||
|
@ -54,6 +57,8 @@ class AscendKernelMod : public KernelMod {
|
|||
}
|
||||
device::DynamicKernelPtr DynamicKernel() const { return dynamic_kernel_; }
|
||||
|
||||
static std::lock_guard<std::mutex> LockRuntime();
|
||||
|
||||
protected:
|
||||
uint32_t block_dim_{1};
|
||||
uint32_t stream_id_{0};
|
||||
|
|
|
@ -66,7 +66,13 @@ HcclKernelFactory &HcclKernelFactory::Get() {
|
|||
|
||||
HcclKernel::HcclKernel()
|
||||
: hccl_count_(0), op_type_(::HcclReduceOp::HCCL_REDUCE_SUM), root_id_(0), src_rank_(0), dest_rank_(0) {}
|
||||
|
||||
HcclKernel::HcclKernel(const AnfNodePtr &anf_node)
|
||||
: AscendKernelMod(),
|
||||
hccl_count_(0),
|
||||
op_type_(::HcclReduceOp::HCCL_REDUCE_SUM),
|
||||
root_id_(0),
|
||||
src_rank_(0),
|
||||
dest_rank_(0) {}
|
||||
HcclKernel::~HcclKernel() {
|
||||
hccl_kernel_input_shape_list_.clear();
|
||||
hccl_kernel_output_shape_list_.clear();
|
||||
|
@ -294,5 +300,99 @@ device::DynamicKernelPtr HcclKernel::GenDynamicKernel(const CNodePtr &cnode_ptr,
|
|||
hccl_type, input_data_addr, output_data_addr, hccl_count_, data_type, op_type_, root_id_, stream_ptr, cnode_ptr);
|
||||
return executor;
|
||||
}
|
||||
|
||||
void HcclKernel::InferOp() {
|
||||
if (AnfAlgo::IsDynamicShape(anf_node_.lock())) {
|
||||
KernelMod::InferShape();
|
||||
}
|
||||
}
|
||||
|
||||
bool HcclKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
|
||||
auto node = anf_node_.lock();
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (!node->isa<CNode>()) {
|
||||
MS_LOG(EXCEPTION) << "anfnode is not a cnode";
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
|
||||
if (inputs.empty() && outputs.empty()) {
|
||||
MS_LOG(ERROR) << "Hccl kernel input or output is empty";
|
||||
return false;
|
||||
}
|
||||
if (hccl_data_type_list_.empty()) {
|
||||
MS_LOG(ERROR) << "Hccl data type list is empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
MS_EXCEPTION_IF_NULL(stream_ptr);
|
||||
|
||||
MS_LOG(INFO) << "Start Execute: " << cnode->DebugString();
|
||||
std::string hccl_type = MsOpNameToHcomOpType(AnfAlgo::GetCNodeName(anf_node_.lock()));
|
||||
HcclDataType data_type = hccl_data_type_list_[0];
|
||||
|
||||
::HcomOperation op_info;
|
||||
op_info.hcclType = hccl_type;
|
||||
op_info.inputPtr = inputs[0]->addr;
|
||||
op_info.outputPtr = outputs[0]->addr;
|
||||
op_info.dataType = static_cast<HcclDataType>(data_type);
|
||||
op_info.opType = static_cast<HcclReduceOp>(op_type_);
|
||||
op_info.root = IntToUint(root_id_);
|
||||
op_info.count = hccl_count_;
|
||||
|
||||
auto callback = [this](HcclResult status) {
|
||||
if (status != HCCL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "HcomExcutorInitialize failed, ret:" << status;
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(this->hccl_mutex_);
|
||||
this->cond_.notify_all();
|
||||
MS_LOG(INFO) << "hccl callback success.";
|
||||
};
|
||||
|
||||
auto hccl_ret = hccl::HcclAdapter::GetInstance().HcclExecEnqueueOp(op_info, callback);
|
||||
if (hccl_ret != HCCL_SUCCESS) {
|
||||
MS_LOG(EXCEPTION) << "Call EnqueueHcomOperation failed, node info: " << cnode->DebugString();
|
||||
return false;
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> ulock(hccl_mutex_);
|
||||
cond_.wait(ulock);
|
||||
MS_LOG(INFO) << "Execute " << cnode->DebugString() << " success";
|
||||
return true;
|
||||
}
|
||||
|
||||
void HcclKernel::InitOp() {
|
||||
auto node = anf_node_.lock();
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (!node->isa<CNode>()) {
|
||||
MS_LOG(EXCEPTION) << "anfnode is not a cnode";
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
|
||||
if (!AnfAlgo::IsDynamicShape(cnode)) {
|
||||
MS_LOG(DEBUG) << "The node is not dynamic shape: " << cnode->fullname_with_scope();
|
||||
return;
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Start to InitOp. Node info: " << cnode->DebugString();
|
||||
|
||||
std::vector<std::vector<size_t>> hccl_kernel_input_shape_list;
|
||||
if (!HcomUtil::GetKernelInputShape(cnode, &hccl_kernel_input_shape_list)) {
|
||||
MS_LOG(EXCEPTION) << "GetKernelInputShape fail! Node info: " << cnode->DebugString();
|
||||
}
|
||||
|
||||
std::vector<HcclDataType> hccl_data_type_list;
|
||||
if (!HcomUtil::GetHcomDataType(cnode, &hccl_data_type_list)) {
|
||||
MS_LOG(EXCEPTION) << "GetHcomDataType fail! Node info: " << cnode->DebugString();
|
||||
}
|
||||
|
||||
// Update Hccl count
|
||||
if (!HcomUtil::GetHcomCount(cnode, hccl_data_type_list, hccl_kernel_input_shape_list, &hccl_count_)) {
|
||||
MS_LOG(EXCEPTION) << "GetHcomCount fail! Node info: " << cnode->DebugString();
|
||||
}
|
||||
MS_LOG(INFO) << "Update Hccl count:" << hccl_count_;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -34,6 +34,7 @@ namespace kernel {
|
|||
class HcclKernel : public AscendKernelMod {
|
||||
public:
|
||||
HcclKernel();
|
||||
explicit HcclKernel(const AnfNodePtr &anf_node);
|
||||
~HcclKernel() override;
|
||||
virtual bool Init(const AnfNodePtr &anf_node);
|
||||
const std::vector<size_t> &GetInputSizeList() const override;
|
||||
|
@ -43,6 +44,12 @@ class HcclKernel : public AscendKernelMod {
|
|||
const std::vector<AddressPtr> &outputs, uint32_t stream_id) override;
|
||||
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
|
||||
void InferOp() override;
|
||||
void InitOp() override;
|
||||
|
||||
protected:
|
||||
std::vector<std::vector<size_t>> hccl_kernel_input_shape_list_;
|
||||
std::vector<std::vector<size_t>> hccl_kernel_output_shape_list_;
|
||||
|
@ -56,9 +63,10 @@ class HcclKernel : public AscendKernelMod {
|
|||
mutable std::vector<size_t> input_size_list_;
|
||||
mutable std::vector<size_t> output_size_list_;
|
||||
mutable std::vector<size_t> workspace_size_list_;
|
||||
AnfNodeWeakPtr anf_node_;
|
||||
std::string op_name_;
|
||||
std::string group_;
|
||||
std::mutex hccl_mutex_;
|
||||
std::condition_variable cond_;
|
||||
};
|
||||
|
||||
using HcclKernelCreater = std::function<std::shared_ptr<HcclKernel>()>;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "backend/kernel_compiler/host/dynamic_broadcast_gradient_args_kernel.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "runtime/device/ascend/ascend_kernel_runtime.h"
|
||||
#include "utils/trace_base.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -195,6 +196,15 @@ void DynamicBroadcastGradientArgsKernel::Execute() {
|
|||
input_shapes[1] = GetInputShape(cnode, 1);
|
||||
auto grad_reduce_idx = CalculateOutput(input_shapes);
|
||||
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime();
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AscendKernelMod::LockRuntime();
|
||||
auto ret = runtime_instance->SyncStream();
|
||||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "Sync stream error!";
|
||||
}
|
||||
|
||||
auto r0_size = SetOutputValue(cnode, grad_reduce_idx, 0, input_shapes[0].size());
|
||||
auto r1_size = SetOutputValue(cnode, grad_reduce_idx, 1, input_shapes[1].size());
|
||||
|
||||
|
@ -209,5 +219,26 @@ device::DynamicKernelPtr DynamicBroadcastGradientArgsKernelMod::GenDynamicKernel
|
|||
void *stream_ptr) {
|
||||
return std::make_shared<DynamicBroadcastGradientArgsKernel>(stream_ptr, cnode_ptr);
|
||||
}
|
||||
|
||||
bool DynamicBroadcastGradientArgsKernelMod::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &, void *stream_ptr) {
|
||||
auto node = anf_node_.lock();
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (!node->isa<CNode>()) {
|
||||
MS_LOG(EXCEPTION) << "anfnode is not a cnode";
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
stream_ = stream_ptr;
|
||||
auto broadcast_grad_kernel = std::make_shared<DynamicBroadcastGradientArgsKernel>(stream_ptr, cnode);
|
||||
try {
|
||||
broadcast_grad_kernel->Execute();
|
||||
} catch (const std::exception &e) {
|
||||
MS_LOG(ERROR) << "DynamicBroadcastGradientArgsKernel Launch failed. node: " << cnode->fullname_with_scope()
|
||||
<< ", Error message is " << e.what();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -36,6 +36,8 @@ class DynamicBroadcastGradientArgsKernelMod : public HostKernelMod {
|
|||
DynamicBroadcastGradientArgsKernelMod() = default;
|
||||
~DynamicBroadcastGradientArgsKernelMod() override = default;
|
||||
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
};
|
||||
MS_HOST_REG_KERNEL(DynamicBroadcastGradientArgs, DynamicBroadcastGradientArgsKernelMod);
|
||||
} // namespace kernel
|
||||
|
|
|
@ -114,5 +114,26 @@ void DynamicReshapeKernel::Execute() {
|
|||
device::DynamicKernelPtr DynamicReshapeKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
|
||||
return std::make_shared<DynamicReshapeKernel>(stream_ptr, cnode_ptr);
|
||||
}
|
||||
|
||||
bool DynamicReshapeKernelMod::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &, void *stream_ptr) {
|
||||
auto node = anf_node_.lock();
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (!node->isa<CNode>()) {
|
||||
MS_LOG(EXCEPTION) << "anfnode is not a cnode";
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
stream_ = stream_ptr;
|
||||
auto reshape_kernel = std::make_shared<DynamicReshapeKernel>(stream_ptr, cnode);
|
||||
try {
|
||||
reshape_kernel->Execute();
|
||||
} catch (const std::exception &e) {
|
||||
MS_LOG(ERROR) << "DynamicReshapeKernel Launch failed. node: " << cnode->fullname_with_scope()
|
||||
<< ", Error message is " << e.what();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -35,6 +35,9 @@ class DynamicReshapeKernelMod : public HostKernelMod {
|
|||
DynamicReshapeKernelMod() = default;
|
||||
~DynamicReshapeKernelMod() override = default;
|
||||
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
void UpdateOp() override { AscendKernelMod::UpdateOp(); }
|
||||
};
|
||||
MS_HOST_REG_KERNEL(DynamicReshape, DynamicReshapeKernelMod);
|
||||
} // namespace kernel
|
||||
|
|
|
@ -57,6 +57,8 @@ void DynamicShapeKernel::Execute() {
|
|||
} else {
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime();
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AscendKernelMod::LockRuntime();
|
||||
auto ret = runtime_instance->SyncStream();
|
||||
if (!ret) {
|
||||
MS_LOG(EXCEPTION) << "Sync stream error!";
|
||||
|
@ -106,5 +108,23 @@ void DynamicShapeKernel::Execute(const std::vector<AddressPtr> &inputs, const st
|
|||
device::DynamicKernelPtr DynamicShapeKernelMod::GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) {
|
||||
return std::make_shared<DynamicShapeKernel>(stream_ptr, cnode_ptr);
|
||||
}
|
||||
|
||||
bool DynamicShapeKernelMod::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &, void *stream_ptr) {
|
||||
auto node = anf_node_.lock();
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
stream_ = stream_ptr;
|
||||
auto shape_kernel = std::make_shared<DynamicShapeKernel>(stream_ptr, cnode);
|
||||
try {
|
||||
shape_kernel->Execute();
|
||||
} catch (const std::exception &e) {
|
||||
MS_LOG(ERROR) << "DynamicShapeKernelMod Launch failed. node: " << cnode->fullname_with_scope()
|
||||
<< ", Error message is " << e.what();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -38,18 +38,7 @@ class DynamicShapeKernelMod : public HostKernelMod {
|
|||
~DynamicShapeKernelMod() override = default;
|
||||
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (kernel_ == nullptr) {
|
||||
kernel_ =
|
||||
std::dynamic_pointer_cast<DynamicShapeKernel>(GenDynamicKernel(anf_node_->cast<CNodePtr>(), stream_ptr));
|
||||
kernel_->Initialize();
|
||||
}
|
||||
kernel_->Execute(inputs, outputs);
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<DynamicShapeKernel> kernel_;
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
};
|
||||
MS_HOST_REG_KERNEL(DynamicShape, DynamicShapeKernelMod);
|
||||
} // namespace kernel
|
||||
|
|
|
@ -77,6 +77,16 @@ bool HostKernelMod::Launch(const std::vector<AddressPtr> &, const std::vector<Ad
|
|||
const std::vector<AddressPtr> &, void *) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void HostKernelMod::InferOp() {
|
||||
auto node = anf_node_.lock();
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (!AnfAlgo::IsDynamicShape(node)) {
|
||||
MS_LOG(EXCEPTION) << "The node is not dynamic shape.";
|
||||
}
|
||||
KernelMod::InferShape();
|
||||
}
|
||||
|
||||
std::vector<TaskInfoPtr> HostKernelMod::GenTask(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &, uint32_t) {
|
||||
return {};
|
||||
|
|
|
@ -36,9 +36,10 @@ class HostKernelMod : public AscendKernelMod {
|
|||
const std::vector<AddressPtr> &, uint32_t) override;
|
||||
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override = 0;
|
||||
bool Init(const AnfNodePtr &anf_node);
|
||||
void InferOp() override;
|
||||
void UpdateOp() override {}
|
||||
|
||||
protected:
|
||||
AnfNodePtr anf_node_;
|
||||
std::string op_name_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
|
|
|
@ -0,0 +1,184 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/kernel.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <stack>
|
||||
#include <utility>
|
||||
#include "utils/ms_context.h"
|
||||
#include "utils/anf_utils.h"
|
||||
#include "utils/ms_device_shape_transfer.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "backend/optimizer/common/helper.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
constexpr int64_t kInvalidShape = -2;
|
||||
|
||||
void KernelMod::InferShape() {
|
||||
auto node = anf_node_.lock();
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (!node->isa<CNode>()) {
|
||||
MS_LOG(EXCEPTION) << "anfnode is not a cnode";
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
MS_LOG(INFO) << "InferShape start, node:" << cnode->fullname_with_scope();
|
||||
GetDepndLists(cnode);
|
||||
auto ret = InferShapeForDefiniteOutputNode(cnode);
|
||||
if (ret) {
|
||||
return;
|
||||
}
|
||||
depend_tensor_map_.clear();
|
||||
auto inputs = cnode->inputs();
|
||||
if (inputs.empty()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid inputs";
|
||||
}
|
||||
auto context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context);
|
||||
AbstractBasePtrList args_spec_list;
|
||||
auto primitive = GetValueNode<PrimitivePtr>(inputs[0]);
|
||||
auto input_size = AnfAlgo::GetInputTensorNum(cnode);
|
||||
std::vector<AnfNodePtr> input_nodes;
|
||||
for (size_t i = 0; i < input_size; i++) {
|
||||
auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(cnode, i);
|
||||
auto real_input = input_node_with_index.first;
|
||||
MS_EXCEPTION_IF_NULL(real_input);
|
||||
auto cnode_input = cnode->input(i + 1);
|
||||
MS_EXCEPTION_IF_NULL(cnode_input);
|
||||
InferShapeForNopNode(&real_input);
|
||||
if (depend_list_.find(i) != depend_list_.end()) {
|
||||
auto pre_node_with_index = AnfAlgo::GetPrevNodeOutput(cnode, i);
|
||||
bool skip_nop_node = !context->get_param<bool>(MS_CTX_ENABLE_MINDRT);
|
||||
auto output_addr = AnfAlgo::GetPrevNodeMutableOutputAddr(cnode, i, skip_nop_node);
|
||||
std::vector<int64_t> shapes =
|
||||
trans::GetRuntimePaddingShape(pre_node_with_index.first, pre_node_with_index.second);
|
||||
auto host_type = AnfAlgo::GetOutputInferDataType(pre_node_with_index.first, pre_node_with_index.second);
|
||||
auto out_tensor = std::make_shared<tensor::Tensor>(host_type, shapes);
|
||||
MS_EXCEPTION_IF_NULL(out_tensor);
|
||||
// The second parameter must be false, otherwise the device address cannot be released and allocated, and the
|
||||
// address size will be wrong in the dynamic shape scenario.
|
||||
out_tensor->set_device_address(output_addr, false);
|
||||
auto ret2 = depend_tensor_map_.try_emplace(i, out_tensor);
|
||||
if (!ret2.second) {
|
||||
MS_LOG(EXCEPTION) << "Insert map failed";
|
||||
}
|
||||
out_tensor->data_sync();
|
||||
auto lock = AnfUtils::GetAbstractLock(real_input.get());
|
||||
MS_EXCEPTION_IF_NULL(real_input->abstract());
|
||||
auto real_abs = real_input->abstract()->Clone();
|
||||
if (real_abs->isa<abstract::AbstractTensor>()) {
|
||||
real_abs->set_value(out_tensor);
|
||||
} else if (real_abs->isa<abstract::AbstractTuple>()) {
|
||||
auto tuple_get_item_index = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
|
||||
auto abstract_tuple = real_abs->cast<abstract::AbstractTuplePtr>();
|
||||
MS_EXCEPTION_IF_NULL(abstract_tuple);
|
||||
auto tuple_elements = abstract_tuple->elements()[tuple_get_item_index];
|
||||
tuple_elements->set_value(out_tensor);
|
||||
}
|
||||
real_input->set_abstract(real_abs);
|
||||
}
|
||||
bool is_cnode_input = AnfAlgo::AddArgList(&args_spec_list, cnode_input, real_input, i);
|
||||
if (is_cnode_input) {
|
||||
input_nodes.push_back(cnode_input);
|
||||
} else {
|
||||
input_nodes.push_back(real_input);
|
||||
}
|
||||
}
|
||||
std::vector<AbstractScope> locks;
|
||||
std::transform(input_nodes.begin(), input_nodes.end(), std::back_inserter(locks),
|
||||
[](const AnfNodePtr &input) { return AnfUtils::GetAbstractLock(input.get()); });
|
||||
auto eval_result = opt::CppInferShape(primitive, args_spec_list);
|
||||
locks.clear();
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AnfUtils::GetAbstractLock(cnode.get());
|
||||
cnode->set_abstract(eval_result);
|
||||
}
|
||||
|
||||
bool KernelMod::InferShapeForDefiniteOutputNode(const CNodePtr &cnode) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (!AnfAlgo::CheckPrimitiveType(cnode, prim::kPrimShape)) {
|
||||
return false;
|
||||
}
|
||||
auto input_size = AnfAlgo::GetInputTensorNum(cnode);
|
||||
if (input_size != 1) {
|
||||
MS_LOG(EXCEPTION) << "Node only has one input: " << cnode->fullname_with_scope();
|
||||
}
|
||||
auto cur_shape = dynamic_cast<mindspore::abstract::Shape *>(cnode->Shape().get())->shape();
|
||||
if (std::any_of(cur_shape.begin(), cur_shape.end(), [](int64_t x) { return x == kInvalidShape; })) {
|
||||
return false;
|
||||
}
|
||||
std::vector<int64_t> output_shape = {static_cast<int64_t>(cur_shape.size())};
|
||||
mindspore::abstract::BaseShapePtr shape = std::make_shared<mindspore::abstract::Shape>(output_shape);
|
||||
|
||||
auto lock = AnfUtils::GetAbstractLock(cnode.get());
|
||||
auto abstract = cnode->abstract()->Clone();
|
||||
MS_EXCEPTION_IF_NULL(abstract);
|
||||
abstract->set_shape(shape);
|
||||
cnode->set_abstract(abstract);
|
||||
return true;
|
||||
}
|
||||
|
||||
void KernelMod::InferShapeForNopNode(AnfNodePtr *input_node) {
|
||||
MS_EXCEPTION_IF_NULL(*input_node);
|
||||
if (!opt::IsNopNode(*input_node) || !AnfAlgo::IsDynamicShape(*input_node)) {
|
||||
MS_LOG(INFO) << "Input node is not a nop node, no need infer.";
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Infer shape for nop node.";
|
||||
std::stack<AnfNodePtr> nop_road;
|
||||
nop_road.push(*input_node);
|
||||
|
||||
/*lint -e716*/
|
||||
while (true) {
|
||||
auto input_node_with_idx = AnfAlgo::GetPrevNodeOutput(*input_node, 0);
|
||||
auto in_node = input_node_with_idx.first;
|
||||
MS_EXCEPTION_IF_NULL(in_node);
|
||||
if (opt::IsNopNode(in_node)) {
|
||||
nop_road.push(in_node);
|
||||
*input_node = in_node;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*lint +e716*/
|
||||
while (!nop_road.empty()) {
|
||||
auto nop_node = nop_road.top();
|
||||
MS_EXCEPTION_IF_NULL(nop_node);
|
||||
AnfAlgo::InferShape(nop_node->cast<CNodePtr>());
|
||||
nop_road.pop();
|
||||
}
|
||||
}
|
||||
|
||||
void KernelMod::GetDepndLists(const CNodePtr &cnode) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
if (depend_list_.size() != 0) {
|
||||
return;
|
||||
}
|
||||
auto ret = abstract::GetDependsFormMap(cnode);
|
||||
if (ret.empty()) {
|
||||
MS_LOG(DEBUG) << "No dynamic_shape_depends found";
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Have depends";
|
||||
(void)std::transform(ret.begin(), ret.end(), std::inserter(depend_list_, depend_list_.begin()),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
MS_LOG(INFO) << "Init End";
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -18,6 +18,8 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "ir/anf.h"
|
||||
#include "ir/dtype.h"
|
||||
|
@ -180,6 +182,8 @@ struct KernelLaunchInfo {
|
|||
|
||||
class KernelMod {
|
||||
public:
|
||||
KernelMod() {}
|
||||
explicit KernelMod(const AnfNodePtr &anf_node_ptr) : anf_node_(anf_node_ptr) {}
|
||||
virtual const std::vector<size_t> &GetInputSizeList() const = 0;
|
||||
virtual const std::vector<size_t> &GetOutputSizeList() const = 0;
|
||||
virtual const std::vector<size_t> &GetWorkspaceSizeList() const = 0;
|
||||
|
@ -193,6 +197,10 @@ class KernelMod {
|
|||
virtual std::vector<size_t> GenParameters() { return {}; }
|
||||
virtual void ReleaseResource() {}
|
||||
|
||||
virtual void InferOp() {}
|
||||
virtual void InitOp() {}
|
||||
virtual void UpdateOp() {}
|
||||
|
||||
virtual ~KernelMod() = default;
|
||||
void set_unique_name(const std::string &unique_name) { unique_name_ = unique_name; }
|
||||
void set_fullname(const std::string &fullname) { fullname_ = fullname; }
|
||||
|
@ -205,18 +213,29 @@ class KernelMod {
|
|||
const std::vector<AddressPtr> &GetOutputsAddr() { return outputs_addr_; }
|
||||
void SetStream(void *stream) { stream_ = stream; }
|
||||
void *GetStream() const { return stream_; }
|
||||
void SetAtomicCleanNodes(const std::vector<CNodePtr> &atomic_clean_node) { atomic_clean_nodes_ = atomic_clean_node; }
|
||||
|
||||
protected:
|
||||
void InferShape();
|
||||
|
||||
std::string kernel_name_;
|
||||
std::string unique_name_;
|
||||
std::string fullname_;
|
||||
bool is_monad_{false};
|
||||
void *stream_{nullptr};
|
||||
AnfNodeWeakPtr anf_node_;
|
||||
std::map<uint32_t, tensor::TensorPtr> depend_tensor_map_;
|
||||
std::vector<CNodePtr> atomic_clean_nodes_;
|
||||
|
||||
private:
|
||||
void InferShapeForNopNode(AnfNodePtr *input_node);
|
||||
void GetDepndLists(const CNodePtr &cnode);
|
||||
bool InferShapeForDefiniteOutputNode(const CNodePtr &cnode);
|
||||
|
||||
std::vector<AddressPtr> inputs_addr_;
|
||||
std::vector<AddressPtr> workspaces_addr_;
|
||||
std::vector<AddressPtr> outputs_addr_;
|
||||
std::set<uint32_t> depend_list_;
|
||||
};
|
||||
using KernelModPtr = std::shared_ptr<KernelMod>;
|
||||
} // namespace kernel
|
||||
|
|
|
@ -0,0 +1,298 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/tbe/dynamic_tbe_kernel_mod.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <stack>
|
||||
#include "acl/acl_rt.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "runtime/device/kernel_runtime.h"
|
||||
#include "backend/optimizer/common/helper.h"
|
||||
#include "framework/common/debug/log.h"
|
||||
#include "utils/log_adapter.h"
|
||||
#include "utils/convert_utils_base.h"
|
||||
#include "runtime/device/kernel_runtime_manager.h"
|
||||
#include "runtime/kernel.h"
|
||||
#include "runtime/mem.h"
|
||||
#include "pipeline/jit/static_analysis/static_analysis.h"
|
||||
#include "runtime/device/ascend/executor/tiling/op_tiling_adapter.h"
|
||||
#include "utils/ms_device_shape_transfer.h"
|
||||
#include "utils/utils.h"
|
||||
#include "register/op_tiling.h"
|
||||
#include "nlohmann/json.hpp"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
using TbeTaskInfoPtr = std::shared_ptr<mindspore::ge::model_runner::TbeTaskInfo>;
|
||||
using tbe::KernelManager;
|
||||
using AddressPtrList = std::vector<mindspore::kernel::AddressPtr>;
|
||||
|
||||
DynamicTbeKernelMod::DynamicTbeKernelMod(KernelPackPtr kernel_pack, const AnfNodePtr &anf_node_ptr)
|
||||
: TbeKernelMod(std::move(kernel_pack), anf_node_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node_ptr);
|
||||
auto cnode = anf_node_ptr->cast<CNodePtr>();
|
||||
if (cnode != nullptr) {
|
||||
op_compile_info_ = ParseCompileJson(cnode);
|
||||
}
|
||||
}
|
||||
|
||||
DynamicTbeKernelMod::~DynamicTbeKernelMod() {
|
||||
if (tiling_data_ptr_ != nullptr) {
|
||||
(void)rtFree(tiling_data_ptr_);
|
||||
}
|
||||
}
|
||||
|
||||
void DynamicTbeKernelMod::InferOp() {
|
||||
if (AnfAlgo::IsDynamicShape(anf_node_.lock())) {
|
||||
auto node = anf_node_.lock();
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
need_skip_execute_ = NeedSkipExecute(cnode);
|
||||
if (need_skip_execute_) {
|
||||
std::vector<TypeId> dtypes{AnfAlgo::GetOutputInferDataType(cnode, 0)};
|
||||
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {AnfAlgo::GetInputDeviceShape(cnode, 0)}, cnode.get());
|
||||
} else {
|
||||
KernelMod::InferShape();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DynamicTbeKernelMod::InitOp() {
|
||||
auto node = anf_node_.lock();
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
|
||||
if (!AnfAlgo::IsDynamicShape(cnode)) {
|
||||
MS_LOG(EXCEPTION) << "The node is not dynamic shape: " << cnode->fullname_with_scope();
|
||||
}
|
||||
|
||||
if (!atomic_clean_nodes_.empty()) {
|
||||
for (const auto &atomic_clean_node : atomic_clean_nodes_) {
|
||||
AnfAlgo::GetKernelMod(atomic_clean_node)->InitOp();
|
||||
}
|
||||
}
|
||||
|
||||
if (need_skip_execute_) {
|
||||
return;
|
||||
}
|
||||
|
||||
// gen FuncStub
|
||||
if (handle_ == nullptr) {
|
||||
auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim_, true, &handle_, &origin_key_);
|
||||
if (func_stub != 1) {
|
||||
MS_LOG(EXCEPTION) << "GenFuncStub failed.";
|
||||
}
|
||||
}
|
||||
|
||||
// start compute tiling
|
||||
MS_LOG(INFO) << "Start compute tiling of: " << cnode->fullname_with_scope();
|
||||
optiling::utils::OpRunInfo op_run_info_v2(-1, true, 0);
|
||||
device::tiling::OpTilingCalculateAdapter converter;
|
||||
::ge::ComputeGraphPtr ge_graph = std::make_shared<::ge::ComputeGraph>("default");
|
||||
auto ge_node = converter.AnfNodeToGeNodeAdapter(cnode, &ge_graph, depend_tensor_map_, op_compile_info_);
|
||||
(void)optiling::OpParaCalculateV2(ge_node, op_run_info_v2);
|
||||
|
||||
block_dim_ = op_run_info_v2.GetBlockDim();
|
||||
std::vector<int64_t> workspace_size_list;
|
||||
op_run_info_v2.GetAllWorkspaces(workspace_size_list);
|
||||
tiling_data_ = op_run_info_v2.GetAllTilingData().str();
|
||||
tiling_key_ = op_run_info_v2.GetTilingKey();
|
||||
|
||||
workspace_size_list_.clear();
|
||||
workspace_size_list_.resize(workspace_size_list.size());
|
||||
std::transform(workspace_size_list.begin(), workspace_size_list.end(), workspace_size_list_.begin(),
|
||||
[](int64_t size) { return static_cast<size_t>(size); });
|
||||
}
|
||||
|
||||
std::string DynamicTbeKernelMod::ParseCompileJson(const CNodePtr &cnode) {
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
|
||||
bool get_flag = true;
|
||||
std::string op_compile_info = "";
|
||||
TbeUtils::GetCompileInfo(cnode, &op_compile_info, &get_flag);
|
||||
if (!get_flag) {
|
||||
MS_LOG(EXCEPTION) << "Get compile_info failed. The compile result of [" << cnode->fullname_with_scope()
|
||||
<< "] maybe not in the json file(kernel_meta/) or the file had been deleted.";
|
||||
}
|
||||
MS_LOG(INFO) << "Node: " << cnode->fullname_with_scope() << " get compile_info: " << op_compile_info;
|
||||
return op_compile_info;
|
||||
}
|
||||
|
||||
void DynamicTbeKernelMod::InitTilingDataPtr() {
|
||||
if (tiling_data_ptr_ != nullptr) {
|
||||
return;
|
||||
}
|
||||
auto kernel_json_info = kernel_pack_->kernel_json_info();
|
||||
auto op_para_size = kernel_json_info.op_para_size;
|
||||
if (op_para_size > 0) {
|
||||
auto ret = rtMalloc(&tiling_data_ptr_, op_para_size, RT_MEMORY_HBM);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "rtMalloc tiling data failed";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool DynamicTbeKernelMod::CopyTilingToDevice(void *stream_ptr) {
|
||||
InitTilingDataPtr();
|
||||
MS_EXCEPTION_IF_NULL(kernel_pack_);
|
||||
auto kernel_json_info = kernel_pack_->kernel_json_info();
|
||||
|
||||
auto op_para_size = kernel_json_info.op_para_size;
|
||||
if (tiling_data_.size() > op_para_size) {
|
||||
MS_LOG(EXCEPTION) << "Compute tiling size:" << tiling_data_.size()
|
||||
<< " larger than tbe build op_para_size:" << op_para_size;
|
||||
}
|
||||
|
||||
if (tiling_data_.empty() || tiling_data_ptr_ == nullptr) {
|
||||
MS_LOG(INFO) << "Tiling size is 0, skip aclrtMemcpyAsync";
|
||||
return true;
|
||||
}
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AscendKernelMod::LockRuntime();
|
||||
auto ret = aclrtMemcpyAsync(tiling_data_ptr_, op_para_size, tiling_data_.c_str(), tiling_data_.size(),
|
||||
ACL_MEMCPY_HOST_TO_DEVICE, stream_ptr);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "Tiling aclrtMemcpyAsync failed, ret:" << ret;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DynamicTbeKernelMod::NeedSkipExecute(const CNodePtr &cnode) {
|
||||
// Skip run ReduceSum when axis is a Empty Tensor
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
auto op_name = AnfAlgo::GetCNodeName(cnode);
|
||||
if (op_name != kReduceSumOpName) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t axes_index = 1;
|
||||
if (cnode->inputs().size() <= axes_index + 1) {
|
||||
return false;
|
||||
}
|
||||
auto input_axes = cnode->input(axes_index + 1);
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AnfUtils::GetAbstractLock(input_axes.get());
|
||||
auto axes_abs = input_axes->abstract()->Clone();
|
||||
MS_EXCEPTION_IF_NULL(axes_abs);
|
||||
auto axes_shape = AnfAlgo::GetInputDeviceShape(cnode, axes_index);
|
||||
if (axes_abs->isa<abstract::AbstractTensor>()) {
|
||||
if (std::any_of(axes_shape.begin(), axes_shape.end(), [](ssize_t shape) { return shape == 0; })) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DynamicTbeKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
|
||||
if (stream_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (kernel_pack_ == nullptr) {
|
||||
MS_LOG(ERROR) << "kernel pack should not be nullptr.";
|
||||
return false;
|
||||
}
|
||||
if (stream_ == nullptr) {
|
||||
stream_ = stream_ptr;
|
||||
}
|
||||
|
||||
auto node = anf_node_.lock();
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
if (!node->isa<CNode>()) {
|
||||
MS_LOG(EXCEPTION) << "anfnode is not a cnode";
|
||||
}
|
||||
auto cnode = node->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(cnode);
|
||||
|
||||
// is dynamic shape
|
||||
if (!AnfAlgo::IsDynamicShape(cnode)) {
|
||||
MS_LOG(EXCEPTION) << "The cnode is not dynamic shape:" << cnode->fullname_with_scope();
|
||||
}
|
||||
|
||||
if (!atomic_clean_nodes_.empty()) {
|
||||
for (auto atomic_clean_node : atomic_clean_nodes_) {
|
||||
KernelLaunchInfo kernel_launch_info;
|
||||
auto kernel_mod = AnfAlgo::GetKernelMod(atomic_clean_node);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
device::KernelRuntime::GenLaunchArgs(*kernel_mod, atomic_clean_node, &kernel_launch_info);
|
||||
auto atomic_inputs = kernel_launch_info.inputs_;
|
||||
std::vector<AddressPtr> atomic_outputs;
|
||||
std::vector<AddressPtr> atomic_workspace;
|
||||
kernel_mod->Launch(atomic_inputs, atomic_workspace, atomic_outputs, stream_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
// need skip, for reducesum empty input axis
|
||||
if (need_skip_execute_) {
|
||||
// Skip reduce if axis is a empty Tensor (shape = 0)
|
||||
MS_LOG(INFO) << "The node " << cnode->fullname_with_scope() << "Need Skip.";
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AscendKernelMod::LockRuntime();
|
||||
rtError_t status = aclrtMemcpyAsync(outputs[0]->addr, inputs[0]->size, inputs[0]->addr, inputs[0]->size,
|
||||
ACL_MEMCPY_DEVICE_TO_DEVICE, stream_ptr);
|
||||
if (status != RT_ERROR_NONE) {
|
||||
MS_LOG(EXCEPTION) << "aclrtMemcpyAsync failed for " << cnode->fullname_with_scope();
|
||||
}
|
||||
|
||||
MS_LOG(INFO) << "Execute node:" << cnode->fullname_with_scope() << " success.";
|
||||
return true;
|
||||
}
|
||||
|
||||
// copy tiling to device
|
||||
if (!CopyTilingToDevice(stream_ptr)) {
|
||||
MS_LOG(EXCEPTION) << "Copy tiling to device failed. op name: " << cnode->fullname_with_scope();
|
||||
}
|
||||
|
||||
// pack all addresses into a vector.
|
||||
std::vector<void *> runtimeargs;
|
||||
(void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtimeargs),
|
||||
[](const AddressPtr &input) -> void * { return input->addr; });
|
||||
(void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtimeargs),
|
||||
[](const AddressPtr &output) -> void * { return output->addr; });
|
||||
if (!workspace.empty()) {
|
||||
(void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(runtimeargs),
|
||||
[](const AddressPtr &addr) -> void * { return addr->addr; });
|
||||
}
|
||||
|
||||
if (!tiling_data_.empty() && tiling_data_ptr_ != nullptr) {
|
||||
runtimeargs.push_back(tiling_data_ptr_);
|
||||
}
|
||||
|
||||
rtL2Ctrl_t *l2ctrl = nullptr;
|
||||
auto args_size = static_cast<uint32_t>(UlongToUint(sizeof(void *)) * runtimeargs.size());
|
||||
auto node_info = cnode->fullname_with_scope();
|
||||
const auto dev_func =
|
||||
origin_key_.find("kernel0") != origin_key_.npos ? origin_key_ : origin_key_ + "_" + std::to_string(tiling_key_);
|
||||
const auto kernel_info = node_info + "/" + std::to_string(tiling_key_);
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AscendKernelMod::LockRuntime();
|
||||
auto ret = rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, runtimeargs.data(), args_size, l2ctrl,
|
||||
stream_ptr, kernel_info.c_str());
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Call runtime rtKernelLaunchWithHandle error. Node info: " << node_info;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,65 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TBE_DYNAMIC_TBE_KERNEL_MOD_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TBE_DYNAMIC_TBE_KERNEL_MOD_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include "backend/kernel_compiler/tbe/tbe_kernel_mod.h"
|
||||
#include "backend/kernel_compiler/tbe/tbe_utils.h"
|
||||
#include "runtime/device/device_address.h"
|
||||
#include "ir/tensor.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class DynamicTbeKernelMod : public TbeKernelMod {
|
||||
public:
|
||||
explicit DynamicTbeKernelMod(KernelPackPtr kernel_pack) : TbeKernelMod(kernel_pack) {} // maybe delete later
|
||||
DynamicTbeKernelMod(KernelPackPtr kernel_pack, const AnfNodePtr &anf_node_ptr);
|
||||
~DynamicTbeKernelMod() override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
|
||||
void InferOp() override;
|
||||
void InitOp() override;
|
||||
|
||||
private:
|
||||
void InferShapeRecursive();
|
||||
void InferShapeForNopNode(AnfNodePtr *input_node);
|
||||
std::string ParseCompileJson(const CNodePtr &cnode);
|
||||
void InitTilingDataPtr();
|
||||
bool CopyTilingToDevice(void *stream_ptr);
|
||||
bool NeedSkipExecute(const CNodePtr &cnode);
|
||||
|
||||
uint32_t block_dim_ = 1;
|
||||
std::string tiling_data_;
|
||||
void *tiling_data_ptr_ = nullptr;
|
||||
uint32_t tiling_key_{0};
|
||||
void *handle_ = nullptr;
|
||||
std::string origin_key_{""};
|
||||
std::string op_compile_info_{};
|
||||
bool need_skip_execute_ = false;
|
||||
};
|
||||
|
||||
using DynamicTbeKernelModPtr = std::shared_ptr<DynamicTbeKernelMod>;
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_TBE_TBE_KERNEL_MOD_H_
|
|
@ -15,6 +15,8 @@
|
|||
*/
|
||||
|
||||
#include "backend/kernel_compiler/tbe/tbe_kernel_mod.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include "runtime/rt.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "runtime/device/ascend/ge_runtime/task_info.h"
|
||||
|
@ -41,6 +43,20 @@ bool TbeKernelMod::Launch(const std::vector<mindspore::kernel::AddressPtr> &inpu
|
|||
if (stream_ == nullptr) {
|
||||
stream_ = stream_ptr;
|
||||
}
|
||||
// launch atomic_cleans first
|
||||
if (!atomic_clean_nodes_.empty()) {
|
||||
for (const auto &atomic_clean_node : atomic_clean_nodes_) {
|
||||
KernelLaunchInfo kernel_launch_info;
|
||||
auto kernel_mod = AnfAlgo::GetKernelMod(atomic_clean_node);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
device::KernelRuntime::GenLaunchArgs(*kernel_mod, atomic_clean_node, &kernel_launch_info);
|
||||
auto atomic_inputs = kernel_launch_info.inputs_;
|
||||
std::vector<AddressPtr> atomic_outputs;
|
||||
std::vector<AddressPtr> atomic_workspace;
|
||||
kernel_mod->Launch(atomic_inputs, atomic_workspace, atomic_outputs, stream_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t blockdim = 1; // default blockdim equal to 1.
|
||||
auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &blockdim);
|
||||
if (func_stub == 0) {
|
||||
|
@ -61,6 +77,7 @@ bool TbeKernelMod::Launch(const std::vector<mindspore::kernel::AddressPtr> &inpu
|
|||
rtL2Ctrl_t *l2ctrl = nullptr;
|
||||
const void *stubFunc = reinterpret_cast<void *>(func_stub);
|
||||
auto argsSize = static_cast<uint32_t>(UlongToUint(sizeof(void *)) * runtimeargs.size());
|
||||
auto lock = AscendKernelMod::LockRuntime();
|
||||
auto ret = rtKernelLaunch(stubFunc, blockdim, runtimeargs.data(), argsSize, l2ctrl, stream_);
|
||||
if (ret != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Call runtime rtKernelLaunch error.";
|
||||
|
|
|
@ -29,6 +29,8 @@ namespace kernel {
|
|||
class TbeKernelMod : public AscendKernelMod {
|
||||
public:
|
||||
explicit TbeKernelMod(KernelPackPtr kernel_pack) : kernel_pack_(std::move(kernel_pack)) {}
|
||||
TbeKernelMod(KernelPackPtr kernel_pack, const AnfNodePtr &anf_node_ptr)
|
||||
: AscendKernelMod(anf_node_ptr), kernel_pack_(std::move(kernel_pack)) {}
|
||||
~TbeKernelMod() override = default;
|
||||
|
||||
void SetInputSizeList(const std::vector<size_t> &size_list) { input_size_list_ = size_list; }
|
||||
|
@ -45,7 +47,7 @@ class TbeKernelMod : public AscendKernelMod {
|
|||
device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) override;
|
||||
std::vector<size_t> GenParameters() override;
|
||||
|
||||
private:
|
||||
protected:
|
||||
KernelPackPtr kernel_pack_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
|
|
|
@ -729,9 +729,8 @@ KernelWithIndex AnfRuntimeAlgorithm::GetPrevNodeOutput(const AnfNodePtr &anf_nod
|
|||
auto kernel_info = anf_node->kernel_info();
|
||||
if (kernel_info) {
|
||||
auto runtime_cache = kernel_info->runtime_cache();
|
||||
MS_EXCEPTION_IF_NULL(runtime_cache);
|
||||
if (runtime_cache->is_valid()) {
|
||||
auto output = runtime_cache->get_prev_node_output(input_idx);
|
||||
if (runtime_cache.runtime_cache().is_valid()) {
|
||||
auto output = runtime_cache.runtime_cache().get_prev_node_output(input_idx);
|
||||
if (output.first != nullptr) {
|
||||
return output;
|
||||
}
|
||||
|
@ -747,9 +746,8 @@ KernelWithIndex AnfRuntimeAlgorithm::GetPrevNodeOutput(const AnfNodePtr &anf_nod
|
|||
}
|
||||
if (kernel_info) {
|
||||
auto runtime_cache = kernel_info->runtime_cache();
|
||||
MS_EXCEPTION_IF_NULL(runtime_cache);
|
||||
if (runtime_cache->is_valid()) {
|
||||
runtime_cache->set_prev_node_output(input_idx, res);
|
||||
if (runtime_cache.runtime_cache().is_valid()) {
|
||||
runtime_cache.runtime_cache().set_prev_node_output(input_idx, res);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
|
@ -2065,7 +2063,7 @@ std::vector<int64_t> AnfRuntimeAlgorithm::GetOutputMinShape(const AnfNodePtr &an
|
|||
}
|
||||
}
|
||||
|
||||
bool IsNodeInputDynamicShape(const CNodePtr &anf_node_ptr) {
|
||||
bool AnfRuntimeAlgorithm::IsNodeInputDynamicShape(const CNodePtr &anf_node_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node_ptr);
|
||||
auto input_num = AnfAlgo::GetInputTensorNum(anf_node_ptr);
|
||||
for (size_t i = 0; i < input_num; ++i) {
|
||||
|
@ -2274,6 +2272,7 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map<uint32_t, te
|
|||
AbstractBasePtrList args_spec_list;
|
||||
auto primitive = GetValueNode<PrimitivePtr>(inputs[0]);
|
||||
auto input_size = AnfAlgo::GetInputTensorNum(node);
|
||||
std::vector<AnfNodePtr> input_nodes;
|
||||
for (size_t i = 0; i < input_size; ++i) {
|
||||
auto input_with_index = AnfAlgo::GetPrevNodeOutput(node, i);
|
||||
auto real_input = input_with_index.first;
|
||||
|
@ -2289,9 +2288,12 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map<uint32_t, te
|
|||
// sync data from device to host
|
||||
tensor_ptr->data_sync();
|
||||
}
|
||||
auto real_abs = real_input->abstract();
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AnfUtils::GetAbstractLock(real_input.get());
|
||||
MS_EXCEPTION_IF_NULL(real_input->abstract());
|
||||
auto real_abs = real_input->abstract()->Clone();
|
||||
if (real_abs->isa<abstract::AbstractTensor>()) {
|
||||
real_input->abstract()->set_value(tensor_ptr);
|
||||
real_abs->set_value(tensor_ptr);
|
||||
} else if (real_abs->isa<abstract::AbstractTuple>()) {
|
||||
auto tuple_get_item_index = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
|
||||
auto abstract_tuple = real_abs->cast<abstract::AbstractTuplePtr>();
|
||||
|
@ -2299,15 +2301,27 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map<uint32_t, te
|
|||
auto tuple_elements = abstract_tuple->elements()[tuple_get_item_index];
|
||||
tuple_elements->set_value(tensor_ptr);
|
||||
}
|
||||
real_input->set_abstract(real_abs);
|
||||
}
|
||||
}
|
||||
AddArgList(&args_spec_list, cnode_input, real_input, i);
|
||||
bool is_cnode_input = AddArgList(&args_spec_list, cnode_input, real_input, i);
|
||||
if (is_cnode_input) {
|
||||
input_nodes.push_back(cnode_input);
|
||||
} else {
|
||||
input_nodes.push_back(real_input);
|
||||
}
|
||||
}
|
||||
std::vector<AbstractScope> locks;
|
||||
std::transform(input_nodes.begin(), input_nodes.end(), std::back_inserter(locks),
|
||||
[](const AnfNodePtr &input) { return AnfUtils::GetAbstractLock(input.get()); });
|
||||
auto eval_result = opt::CppInferShape(primitive, args_spec_list);
|
||||
locks.clear();
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AnfUtils::GetAbstractLock(node.get());
|
||||
node->set_abstract(eval_result);
|
||||
}
|
||||
|
||||
void AnfRuntimeAlgorithm::AddArgList(AbstractBasePtrList *args_spec_list, const AnfNodePtr &cnode_input,
|
||||
bool AnfRuntimeAlgorithm::AddArgList(AbstractBasePtrList *args_spec_list, const AnfNodePtr &cnode_input,
|
||||
const AnfNodePtr &real_input, size_t index) {
|
||||
if (AnfAlgo::CheckPrimitiveType(cnode_input, prim::kPrimTupleGetItem)) {
|
||||
auto base_shape = real_input->Shape();
|
||||
|
@ -2315,15 +2329,24 @@ void AnfRuntimeAlgorithm::AddArgList(AbstractBasePtrList *args_spec_list, const
|
|||
MS_LOG(EXCEPTION) << "Node input is a tuple_get_item but real input node shape is not a TupleShape. trace: "
|
||||
<< trace::DumpSourceLines(real_input);
|
||||
}
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AnfUtils::GetAbstractLock(real_input.get());
|
||||
auto abs = real_input->abstract()->cast<abstract::AbstractTuplePtr>();
|
||||
MS_EXCEPTION_IF_NULL(abs);
|
||||
auto tuple_get_item_indexk = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
|
||||
auto abs_i = abs->elements()[tuple_get_item_indexk];
|
||||
(void)args_spec_list->emplace_back(abs_i);
|
||||
return false;
|
||||
} else if (cnode_input->isa<CNode>() && AnfAlgo::GetCNodeName(cnode_input) == prim::kPrimReshape->name()) {
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AnfUtils::GetAbstractLock(cnode_input.get());
|
||||
(void)args_spec_list->emplace_back(cnode_input->abstract());
|
||||
return true;
|
||||
} else {
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AnfUtils::GetAbstractLock(real_input.get());
|
||||
(void)args_spec_list->emplace_back(real_input->abstract());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -288,6 +288,7 @@ class AnfRuntimeAlgorithm {
|
|||
static TypeId GetCNodeOutputPrecision(const AnfNodePtr &node);
|
||||
// get fix output precision from prev node, input_idx is the input index of current node related to prev node.
|
||||
static TypeId GetPrevNodeOutputPrecision(const AnfNodePtr &node, size_t input_idx);
|
||||
static bool IsNodeInputDynamicShape(const CNodePtr &anf_node_ptr);
|
||||
static bool IsDynamicShape(const AnfNodePtr &node);
|
||||
static bool HasDynamicShapeFlag(const PrimitivePtr &prim);
|
||||
static bool IsCondControlKernel(const CNodePtr &node);
|
||||
|
@ -302,7 +303,8 @@ class AnfRuntimeAlgorithm {
|
|||
static bool IsNodeDynamicShape(const AnfNodePtr &node);
|
||||
static bool IsHostKernel(const CNodePtr &node);
|
||||
static void InferShape(const CNodePtr &node, std::map<uint32_t, tensor::TensorPtr> *depend_tensors = nullptr);
|
||||
static void AddArgList(AbstractBasePtrList *args_spec_list, const AnfNodePtr &cnode_input,
|
||||
// return true if use cnode_input's abstract, false if use real_input's abstract
|
||||
static bool AddArgList(AbstractBasePtrList *args_spec_list, const AnfNodePtr &cnode_input,
|
||||
const AnfNodePtr &real_input, size_t index);
|
||||
static std::vector<size_t> GetInputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index);
|
||||
static std::vector<size_t> GetOutputRealDeviceShapeIfExist(const AnfNodePtr &anf_node, size_t index);
|
||||
|
|
|
@ -123,8 +123,7 @@ void AscendEnableDynamicRuntimeCache(const KernelGraph *graph) {
|
|||
}
|
||||
MS_EXCEPTION_IF_NULL(kernel_info);
|
||||
auto runtime_cache = kernel_info->runtime_cache();
|
||||
MS_EXCEPTION_IF_NULL(runtime_cache);
|
||||
runtime_cache->set_valid();
|
||||
runtime_cache.runtime_cache().set_valid();
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
|
|
@ -37,21 +37,21 @@ class OpTilingCalculateAdapter {
|
|||
OpTilingCalculateAdapter() = default;
|
||||
~OpTilingCalculateAdapter() = default;
|
||||
|
||||
ge::Operator AnfNodeToGeNodeAdapter(const CNodePtr &node, ge::ComputeGraphPtr *ge_graph,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map,
|
||||
const std::string &op_compile_info);
|
||||
::ge::Operator AnfNodeToGeNodeAdapter(const CNodePtr &node, ::ge::ComputeGraphPtr *ge_graph,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map,
|
||||
const std::string &op_compile_info);
|
||||
|
||||
private:
|
||||
void ConvertInputShapeAndType(const CNodePtr &node, ge::OpDescPtr *op_desc);
|
||||
void ConvertOutputShapeAndType(const CNodePtr &node, ge::OpDescPtr *op_desc);
|
||||
void ConvertCompileInfo(const CNodePtr &node, ge::OpDescPtr *op_desc);
|
||||
void ConvertAttrs(const CNodePtr &node, ge::OpDescPtr *op_desc);
|
||||
std::vector<std::tuple<std::size_t, ge::NodePtr>> ConvertDepends(
|
||||
const CNodePtr &node, const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map, ge::OpDescPtr *op_desc,
|
||||
ge::ComputeGraphPtr *ge_graph);
|
||||
ge::NodePtr NewConstantOp(const CNodePtr &node, const std::string &name, const tensor::TensorPtr &tensor_data,
|
||||
ge::ComputeGraphPtr *ge_graph, size_t index);
|
||||
void AddEdge(const ge::NodePtr &ge_node, const std::vector<std::tuple<std::size_t, ge::NodePtr>> &constant_ops);
|
||||
void ConvertInputShapeAndType(const CNodePtr &node, ::ge::OpDescPtr *op_desc);
|
||||
void ConvertOutputShapeAndType(const CNodePtr &node, ::ge::OpDescPtr *op_desc);
|
||||
void ConvertCompileInfo(const CNodePtr &node, ::ge::OpDescPtr *op_desc);
|
||||
void ConvertAttrs(const CNodePtr &node, ::ge::OpDescPtr *op_desc);
|
||||
std::vector<std::tuple<std::size_t, ::ge::NodePtr>> ConvertDepends(
|
||||
const CNodePtr &node, const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map, ::ge::OpDescPtr *op_desc,
|
||||
::ge::ComputeGraphPtr *ge_graph);
|
||||
::ge::NodePtr NewConstantOp(const CNodePtr &node, const std::string &name, const tensor::TensorPtr &tensor_data,
|
||||
::ge::ComputeGraphPtr *ge_graph, size_t index);
|
||||
void AddEdge(const ::ge::NodePtr &ge_node, const std::vector<std::tuple<std::size_t, ::ge::NodePtr>> &constant_ops);
|
||||
std::string GetRealOpType(const std::string &op_type);
|
||||
std::string GetInputName(const CNodePtr &node, size_t index);
|
||||
std::string GetOutputName(const CNodePtr &node, size_t index);
|
||||
|
|
|
@ -103,7 +103,7 @@ void DynamicKernel::InferShape() {
|
|||
tuple_elements->set_value(out_tensor);
|
||||
}
|
||||
}
|
||||
AnfAlgo::AddArgList(&args_spec_list, cnode_input, real_input, i);
|
||||
(void)AnfAlgo::AddArgList(&args_spec_list, cnode_input, real_input, i);
|
||||
}
|
||||
auto eval_result = opt::CppInferShape(primitive, args_spec_list);
|
||||
cnode->set_abstract(eval_result);
|
||||
|
|
|
@ -164,8 +164,7 @@ class DeviceContext {
|
|||
}
|
||||
MS_EXCEPTION_IF_NULL(kernel_info);
|
||||
auto runtime_cache = kernel_info->runtime_cache();
|
||||
MS_EXCEPTION_IF_NULL(runtime_cache);
|
||||
runtime_cache->set_valid();
|
||||
runtime_cache.runtime_cache().set_valid();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -28,8 +28,21 @@
|
|||
#include "ir/func_graph.h"
|
||||
#include "ir/primitive.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "utils/anf_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
const AbstractBasePtr &AnfNode::abstract() const {
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AnfUtils::GetAbstractLock(this);
|
||||
return abstract_;
|
||||
}
|
||||
|
||||
void AnfNode::set_abstract(const AbstractBasePtr &abs) {
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AnfUtils::GetAbstractLock(this);
|
||||
abstract_ = abs;
|
||||
}
|
||||
|
||||
// namespace to support intermediate representation definition
|
||||
CNode::CNode(const std::vector<AnfNodePtr> &inputs, const FuncGraphPtr &func_graph)
|
||||
: AnfNode(func_graph),
|
||||
|
@ -574,9 +587,8 @@ std::string GetCNodeTarget(const AnfNodePtr &node) {
|
|||
auto kernel_info = node->kernel_info();
|
||||
if (kernel_info != nullptr) {
|
||||
auto runtime_cache = kernel_info->runtime_cache();
|
||||
MS_EXCEPTION_IF_NULL(runtime_cache);
|
||||
if (runtime_cache->is_valid()) {
|
||||
auto tmp_target = runtime_cache->device_target();
|
||||
if (runtime_cache.runtime_cache().is_valid()) {
|
||||
auto tmp_target = runtime_cache.runtime_cache().device_target();
|
||||
if (!tmp_target.empty()) {
|
||||
return tmp_target;
|
||||
}
|
||||
|
@ -595,9 +607,8 @@ std::string GetCNodeTarget(const AnfNodePtr &node) {
|
|||
|
||||
if (kernel_info != nullptr) {
|
||||
auto runtime_cache = kernel_info->runtime_cache();
|
||||
MS_EXCEPTION_IF_NULL(runtime_cache);
|
||||
if (runtime_cache->is_valid()) {
|
||||
runtime_cache->set_device_target(target);
|
||||
if (runtime_cache.runtime_cache().is_valid()) {
|
||||
runtime_cache.runtime_cache().set_device_target(target);
|
||||
}
|
||||
}
|
||||
return target;
|
||||
|
|
|
@ -178,12 +178,12 @@ class MS_CORE_API AnfNode : public Base {
|
|||
/// \brief Obtain the inferred abstract value of this AnfNode.
|
||||
///
|
||||
/// \return The inferred abstract value.
|
||||
const AbstractBasePtr &abstract() const { return abstract_; }
|
||||
const AbstractBasePtr &abstract() const;
|
||||
|
||||
/// \brief Set the abstract value of this AnfNode.
|
||||
///
|
||||
/// \param[in] abs New abstract value.
|
||||
void set_abstract(const AbstractBasePtr &abs) { abstract_ = abs; }
|
||||
void set_abstract(const AbstractBasePtr &abs);
|
||||
|
||||
/// \brief Obtain the intermediate abstract value of this AnfNode.
|
||||
///
|
||||
|
|
|
@ -24,12 +24,21 @@
|
|||
#include "ir/visitor.h"
|
||||
#include "ir/func_graph.h"
|
||||
#include "base/core_ops.h"
|
||||
#include "utils/anf_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
// namespace to support intermediate representation definition
|
||||
// Methods of AnfNode
|
||||
TypePtr AnfNode::Type() const { return (abstract_ == nullptr) ? nullptr : abstract_->BuildType(); }
|
||||
BaseShapePtr AnfNode::Shape() const { return (abstract_ == nullptr) ? nullptr : abstract_->BuildShape(); }
|
||||
TypePtr AnfNode::Type() const {
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AnfUtils::GetAbstractLock(this);
|
||||
return (abstract_ == nullptr) ? nullptr : abstract_->BuildType();
|
||||
}
|
||||
BaseShapePtr AnfNode::Shape() const {
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AnfUtils::GetAbstractLock(this);
|
||||
return (abstract_ == nullptr) ? nullptr : abstract_->BuildShape();
|
||||
}
|
||||
|
||||
std::string AnfNode::ToString() const {
|
||||
return mindspore::label_manage::Label(const_cast<AnfNode *>(this)->shared_from_base<AnfNode>()->debug_info());
|
||||
|
|
|
@ -68,13 +68,26 @@ class RuntimeCache {
|
|||
// Interface for device kernel program information.
|
||||
class KernelInfoDevice {
|
||||
public:
|
||||
class RuntimeCacheScope {
|
||||
public:
|
||||
RuntimeCacheScope(RuntimeCache &base, std::mutex &mu) : runtime_cache_(base), mu_(mu) { mu_.lock(); }
|
||||
RuntimeCacheScope(const RuntimeCacheScope &other) = delete;
|
||||
RuntimeCacheScope operator=(const RuntimeCacheScope &other) = delete;
|
||||
~RuntimeCacheScope() { mu_.unlock(); }
|
||||
RuntimeCache &runtime_cache() { return runtime_cache_; }
|
||||
|
||||
private:
|
||||
RuntimeCache &runtime_cache_;
|
||||
std::mutex &mu_;
|
||||
};
|
||||
// If kernel program was built and build info is set.
|
||||
virtual bool has_build_info() const = 0;
|
||||
|
||||
RuntimeCache *runtime_cache() { return &runtime_cache_; }
|
||||
RuntimeCacheScope runtime_cache() { return RuntimeCacheScope(runtime_cache_, mu_); }
|
||||
|
||||
private:
|
||||
RuntimeCache runtime_cache_;
|
||||
std::mutex mu_;
|
||||
};
|
||||
using KernelInfoDevicePtr = std::shared_ptr<KernelInfoDevice>;
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "utils/anf_utils.h"
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "base/core_ops.h"
|
||||
#include "utils/trace_base.h"
|
||||
|
@ -23,8 +24,52 @@
|
|||
namespace mindspore {
|
||||
namespace {
|
||||
const PrimitiveSet follow_first_input_prims = {prim::kPrimDepend, prim::kPrimLoad};
|
||||
|
||||
class AbstractMutexManager {
|
||||
public:
|
||||
static AbstractMutexManager &GetInstance() {
|
||||
static AbstractMutexManager instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
AbstractScope GetAbstractLock(const AnfNode *node) {
|
||||
std::lock_guard<std::recursive_mutex> lock(mu_);
|
||||
return AbstractScope(&mu_for_nodes_[node]);
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<const AnfNode *, std::recursive_mutex> mu_for_nodes_;
|
||||
std::recursive_mutex mu_;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
AbstractScope::AbstractScope(std::recursive_mutex *mu) {
|
||||
MS_EXCEPTION_IF_NULL(mu);
|
||||
mu_ = mu;
|
||||
mu_->lock();
|
||||
}
|
||||
|
||||
AbstractScope::AbstractScope(AbstractScope &&other) {
|
||||
mu_ = other.mu_;
|
||||
other.mu_ = nullptr;
|
||||
}
|
||||
|
||||
AbstractScope &AbstractScope::operator=(AbstractScope &&other) {
|
||||
mu_ = other.mu_;
|
||||
other.mu_ = nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
AbstractScope::~AbstractScope() {
|
||||
if (mu_ != nullptr) {
|
||||
mu_->unlock();
|
||||
}
|
||||
}
|
||||
|
||||
AbstractScope AnfUtils::GetAbstractLock(const AnfNode *node) {
|
||||
return AbstractMutexManager::GetInstance().GetAbstractLock(node);
|
||||
}
|
||||
|
||||
bool AnfUtils::IsDimUnknown(const abstract::ShapePtr &shape) {
|
||||
MS_EXCEPTION_IF_NULL(shape);
|
||||
return std::any_of(shape->shape().begin(), shape->shape().end(), [](int64_t s) { return s < -1; });
|
||||
|
@ -112,20 +157,18 @@ bool AnfUtils::IsRealKernel(const AnfNodePtr &node) {
|
|||
auto kernel_info = cnode->kernel_info();
|
||||
if (kernel_info) {
|
||||
auto runtime_cache = kernel_info->runtime_cache();
|
||||
MS_EXCEPTION_IF_NULL(runtime_cache);
|
||||
if (runtime_cache->is_real_kernel() != CacheBool::UNCACHED) {
|
||||
return (runtime_cache->is_real_kernel() == CacheBool::TRUE);
|
||||
if (runtime_cache.runtime_cache().is_real_kernel() != CacheBool::UNCACHED) {
|
||||
return (runtime_cache.runtime_cache().is_real_kernel() == CacheBool::TRUE);
|
||||
}
|
||||
}
|
||||
bool res = !IsOneOfPrimitive(cnode->input(kAnfPrimitiveIndex), virtual_prims);
|
||||
|
||||
if (kernel_info) {
|
||||
auto runtime_cache = kernel_info->runtime_cache();
|
||||
MS_EXCEPTION_IF_NULL(runtime_cache);
|
||||
if (res) {
|
||||
runtime_cache->set_real_kernel(CacheBool::TRUE);
|
||||
runtime_cache.runtime_cache().set_real_kernel(CacheBool::TRUE);
|
||||
} else {
|
||||
runtime_cache->set_real_kernel(CacheBool::FALSE);
|
||||
runtime_cache.runtime_cache().set_real_kernel(CacheBool::FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -175,10 +218,15 @@ size_t AnfUtils::GetInputTensorNum(const AnfNodePtr &node) {
|
|||
MS_LOG(EXCEPTION) << "Only cnode has real input, but this anf is " << node->DebugString()
|
||||
<< trace::DumpSourceLines(node);
|
||||
}
|
||||
ssize_t input_tensor_num = cnode->input_tensor_num();
|
||||
if (input_tensor_num >= 0) {
|
||||
return static_cast<size_t>(input_tensor_num);
|
||||
{
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AnfUtils::GetAbstractLock(node.get());
|
||||
ssize_t input_tensor_num = cnode->input_tensor_num();
|
||||
if (input_tensor_num >= 0) {
|
||||
return static_cast<size_t>(input_tensor_num);
|
||||
}
|
||||
}
|
||||
|
||||
size_t input_num = cnode->inputs().size();
|
||||
if (input_num == 0) {
|
||||
MS_LOG(EXCEPTION) << "Cnode inputs size can't be zero" << trace::DumpSourceLines(node);
|
||||
|
@ -191,6 +239,8 @@ size_t AnfUtils::GetInputTensorNum(const AnfNodePtr &node) {
|
|||
auto &inputs = cnode->inputs();
|
||||
// Search monad inputs, backward.
|
||||
for (auto iter = inputs.rbegin(); iter != inputs.rend(); ++iter) {
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AnfUtils::GetAbstractLock(node.get());
|
||||
if (!HasAbstractMonad(*iter)) {
|
||||
// Stop count if we encounter a non-monad input.
|
||||
break;
|
||||
|
@ -198,6 +248,8 @@ size_t AnfUtils::GetInputTensorNum(const AnfNodePtr &node) {
|
|||
--input_num;
|
||||
}
|
||||
}
|
||||
// cppcheck-suppress unreadVariable
|
||||
auto lock = AnfUtils::GetAbstractLock(node.get());
|
||||
cnode->set_input_tensor_num(static_cast<ssize_t>(input_num));
|
||||
return input_num;
|
||||
}
|
||||
|
@ -207,8 +259,8 @@ size_t AnfUtils::GetOutputTensorNum(const AnfNodePtr &node) {
|
|||
auto kernel_info = node->kernel_info();
|
||||
if (kernel_info) {
|
||||
auto runtime_cache = kernel_info->runtime_cache();
|
||||
if (runtime_cache->is_valid()) {
|
||||
ssize_t output_tensor_num = runtime_cache->output_tensor_num();
|
||||
if (runtime_cache.runtime_cache().is_valid()) {
|
||||
ssize_t output_tensor_num = runtime_cache.runtime_cache().output_tensor_num();
|
||||
if (output_tensor_num >= 0) {
|
||||
return static_cast<size_t>(output_tensor_num);
|
||||
}
|
||||
|
@ -231,8 +283,8 @@ size_t AnfUtils::GetOutputTensorNum(const AnfNodePtr &node) {
|
|||
|
||||
if (kernel_info) {
|
||||
auto runtime_cache = kernel_info->runtime_cache();
|
||||
if (runtime_cache->is_valid()) {
|
||||
runtime_cache->set_output_tensor_num(static_cast<ssize_t>(res));
|
||||
if (runtime_cache.runtime_cache().is_valid()) {
|
||||
runtime_cache.runtime_cache().set_output_tensor_num(static_cast<ssize_t>(res));
|
||||
}
|
||||
}
|
||||
return res;
|
||||
|
|
|
@ -25,6 +25,19 @@
|
|||
#include "ir/primitive.h"
|
||||
|
||||
namespace mindspore {
|
||||
class AbstractScope {
|
||||
public:
|
||||
explicit AbstractScope(std::recursive_mutex *mu);
|
||||
AbstractScope(const AbstractScope &other) = delete;
|
||||
AbstractScope operator=(const AbstractScope &other) = delete;
|
||||
AbstractScope(AbstractScope &&other);
|
||||
AbstractScope &operator=(AbstractScope &&other);
|
||||
~AbstractScope();
|
||||
|
||||
private:
|
||||
std::recursive_mutex *mu_;
|
||||
};
|
||||
|
||||
class AnfUtils {
|
||||
public:
|
||||
static bool IsDimUnknown(const abstract::ShapePtr &shape);
|
||||
|
@ -52,6 +65,7 @@ class AnfUtils {
|
|||
static void SetDumpFlag(const AnfNodePtr &node);
|
||||
// Get dump flag from CNode's primitive.
|
||||
static bool GetDumpFlag(const AnfNodePtr &node);
|
||||
static AbstractScope GetAbstractLock(const AnfNode *node);
|
||||
};
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CORE_UTILS_ANF_UTILS_H_
|
||||
|
|
|
@ -182,6 +182,13 @@ file(GLOB_RECURSE MINDSPORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
|||
"../../../mindspore/ccsrc/profiler/device/ascend/*.cc"
|
||||
"../../../mindspore/ccsrc/profiler/device/profiling.cc"
|
||||
"../../../mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.c"
|
||||
"../../../mindspore/ccsrc/backend/kernel_compiler/kernel.cc"
|
||||
"../../../mindspore/ccsrc/backend/kernel_compiler/ascend_kernel_mod.cc"
|
||||
"../../../mindspore/ccsrc/backend/optimizer/common/helper.cc"
|
||||
"../../../mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.cc"
|
||||
"../../../mindspore/ccsrc/runtime/device/ascend/executor/aicpu_ext_info_handle.cc"
|
||||
"../../../mindspore/ccsrc/runtime/device/ascend/ge_types_convert.cc"
|
||||
"../../../mindspore/ccsrc/backend/kernel_compiler/aicpu/aicpu_util.cc"
|
||||
)
|
||||
|
||||
if(ENABLE_SECURITY)
|
||||
|
@ -230,6 +237,24 @@ add_dependencies(_ut_ut_obj engine-cache-server graph)
|
|||
add_executable(ut_tests $<TARGET_OBJECTS:_ut_ut_obj>
|
||||
$<TARGET_OBJECTS:_ut_mindspore_obj>)
|
||||
|
||||
include_directories("${CMAKE_BINARY_DIR}/backend/kernel_compiler/aicpu")
|
||||
file(GLOB_RECURSE PROTO_IN RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"../../../mindspore/ccsrc/backend/kernel_compiler/aicpu/proto/*.proto")
|
||||
ms_protobuf_generate(PROTOSRCS PROTOHDRS ${PROTO_IN})
|
||||
|
||||
file(GLOB_RECURSE PROTO_DUMP RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
"../../../mindspore/ccsrc/runtime/device/ascend/dump/proto/*.proto")
|
||||
ms_protobuf_generate(DUMP_PROTOSRCS PROTOHDRS ${PROTO_DUMP})
|
||||
|
||||
list(APPEND MINDSPORE_PROTO_LIST ${PROTOSRCS})
|
||||
list(APPEND MINDSPORE_PROTO_LIST ${PREDICT_PROTOSRCS})
|
||||
list(APPEND MINDSPORE_PROTO_LIST ${DUMP_PROTOSRCS})
|
||||
|
||||
if(MINDSPORE_PROTO_LIST)
|
||||
add_library(proto_input_ut STATIC ${MINDSPORE_PROTO_LIST})
|
||||
set_target_properties(proto_input_ut PROPERTIES COMPILE_FLAGS "-Wno-unused-variable")
|
||||
endif()
|
||||
|
||||
if(ENABLE_GE)
|
||||
if(ENABLE_TRAIN)
|
||||
target_link_libraries(ut_tests PRIVATE graph ge_runner)
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "register/op_tiling_info.h"
|
||||
#include "register/op_tiling.h"
|
||||
|
||||
namespace optiling {
|
||||
using std::make_shared;
|
||||
extern "C" ge::graphStatus OpParaCalculateV2(const ge::Operator &op, OpRunInfoV2 &run_info) {
|
||||
return ge::GRAPH_SUCCESS;
|
||||
}
|
||||
|
||||
namespace utils {
|
||||
OpRunInfo::OpRunInfo() {}
|
||||
|
||||
OpRunInfo::OpRunInfo(const uint32_t &block_dim, const bool &clear_atomic, const uint64_t &tiling_key) {}
|
||||
|
||||
OpRunInfo::OpRunInfo(const OpRunInfo &runinfo) {}
|
||||
|
||||
OpRunInfo::OpRunInfo(OpRunInfo &&runinfo) {}
|
||||
|
||||
OpRunInfo &OpRunInfo::operator=(const OpRunInfo &runinfo) { return *this; }
|
||||
|
||||
OpRunInfo &OpRunInfo::operator=(OpRunInfo &&runinfo) { return *this; }
|
||||
|
||||
void OpRunInfo::SetBlockDim(const uint32_t &block_dim) { return; }
|
||||
|
||||
uint32_t OpRunInfo::GetBlockDim() const { return 0; }
|
||||
|
||||
void OpRunInfo::AddWorkspace(const int64_t &workspace) { return; }
|
||||
|
||||
size_t OpRunInfo::GetWorkspaceNum() const { return 0; }
|
||||
|
||||
ge::graphStatus OpRunInfo::GetWorkspace(const size_t &idx, int64_t &workspace) const { return ge::GRAPH_SUCCESS; }
|
||||
|
||||
void OpRunInfo::GetAllWorkspaces(std::vector<int64_t> &workspaces) const { return; }
|
||||
|
||||
void OpRunInfo::SetWorkspaces(const std::vector<int64_t> &workspaces) { return; }
|
||||
|
||||
void OpRunInfo::InternelSetTiling(const ByteBuffer &value) { return; }
|
||||
|
||||
void OpRunInfo::AddTilingData(const char *_value, size_t _size) { return; }
|
||||
|
||||
ByteBuffer &OpRunInfo::GetAllTilingData() {
|
||||
std::shared_ptr<ByteBuffer> tiling_data = std::make_shared<ByteBuffer>();
|
||||
return *tiling_data;
|
||||
}
|
||||
|
||||
const ByteBuffer &OpRunInfo::GetAllTilingData() const {
|
||||
std::shared_ptr<ByteBuffer> tiling_data = std::make_shared<ByteBuffer>();
|
||||
return *tiling_data;
|
||||
}
|
||||
|
||||
void OpRunInfo::SetClearAtomic(bool clear_atomic_input) { return; }
|
||||
|
||||
bool OpRunInfo::GetClearAtomic() const { return true; }
|
||||
|
||||
void OpRunInfo::SetTilingKey(const uint64_t &new_tiling_key) { return; }
|
||||
|
||||
uint64_t OpRunInfo::GetTilingKey() const { return 0; }
|
||||
} // namespace utils
|
||||
} // namespace optiling
|
|
@ -211,3 +211,9 @@ RTS_API rtError_t rtMemGetInfoEx(rtMemInfoType_t memInfoType, size_t *free, size
|
|||
RTS_API rtError_t rtProfRegisterCtrlCallback(uint32_t moduleId, rtProfCtrlHandle callback) { return RT_ERROR_NONE; }
|
||||
|
||||
RTS_API rtError_t rtGetRtCapability(rtFeatureType_t, int32_t, int64_t *) { return RT_ERROR_NONE; }
|
||||
|
||||
RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args,
|
||||
uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream,
|
||||
const void *kernelInfo) {
|
||||
return RT_ERROR_NONE;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue