!45448 delete useless output for acl adapter

Merge pull request !45448 from liubuyu/acl_launch
This commit is contained in:
i-robot 2022-11-16 08:47:53 +00:00 committed by Gitee
commit 765e7a3c0b
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
7 changed files with 114 additions and 42 deletions

View File

@ -29,6 +29,7 @@ namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kMaxAttrToInputSize = 1024;
constexpr auto kParamDynamic = "dynamic";
static const std::map<::ge::DataType, aclDataType> kMsTypeToAclType = {
{::ge::DT_BOOL, ACL_BOOL}, {::ge::DT_INT8, ACL_INT8}, {::ge::DT_INT16, ACL_INT16},
@ -529,11 +530,34 @@ std::vector<GeTensorDescPtr> AclUtils::GetInputTensorDesc(const AnfNodePtr &anf_
return res;
}
std::set<std::string> AclUtils::GetUselessOutputs(const AnfNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
static const std::map<std::string, std::set<std::string>> kMsUselessOutputs = {
{prim::kPrimApplyMomentum->name(), {"accum"}}};
auto op_name = common::AnfAlgo::GetCNodeName(node);
auto iter = kMsUselessOutputs.find(op_name);
if (iter != kMsUselessOutputs.end()) {
return iter->second;
}
return {};
}
std::vector<GeTensorDescPtr> AclUtils::GetOutputTensorDesc(const AnfNodePtr &anf_node) {
MS_EXCEPTION_IF_NULL(anf_node);
size_t output_num = common::AnfAlgo::GetOutputTensorNum(anf_node);
std::vector<GeTensorDescPtr> res;
auto useless_outputs = GetUselessOutputs(anf_node);
auto out_anchor_names = GetOpOutputAnchorNames(anf_node);
for (size_t i = 0; i < output_num; ++i) {
if (out_anchor_names.size() <= i) {
MS_LOG(EXCEPTION) << "Index [" << i
<< "] exceed the size of all input names, node:" << anf_node->fullname_with_scope();
}
if (useless_outputs.count(out_anchor_names[i])) {
MS_LOG(INFO) << "For op: [" << anf_node->fullname_with_scope() << "],current out anchor name:["
<< out_anchor_names[i] << "] is useless, need skip.";
continue;
}
auto ori_shape = common::AnfAlgo::GetOutputInferShape(anf_node, i);
auto output_shape = AnfAlgo::GetOutputDeviceShape(anf_node, i);
auto output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, i);
@ -545,5 +569,63 @@ std::vector<GeTensorDescPtr> AclUtils::GetOutputTensorDesc(const AnfNodePtr &anf
}
return res;
}
std::shared_ptr<OpInfo> AclUtils::GetKernelOpInfo(const AnfNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
auto node_name = common::AnfAlgo::GetCNodeName(node);
auto op_info_ptr = kernel::OpLib::FindOp(node_name, kernel::kImplyTBE);
return op_info_ptr;
}
std::vector<std::string> AclUtils::GetOpInputAnchorNames(const AnfNodePtr &node) {
auto op_info_ptr = GetKernelOpInfo(node);
MS_EXCEPTION_IF_NULL(op_info_ptr);
auto inputs_ptr = op_info_ptr->inputs_ptr();
auto primitive = common::AnfAlgo::GetCNodePrimitive(node);
MS_EXCEPTION_IF_NULL(primitive);
size_t dynamic_input_index = 0;
std::vector<int64_t> dynamic_inputs_list;
std::vector<std::string> input_names;
if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) {
dynamic_inputs_list = GetValue<std::vector<int64_t>>(primitive->GetAttr(kAttrDynInputSizes));
}
for (const auto &item : inputs_ptr) {
MS_EXCEPTION_IF_NULL(item);
if (item->param_type() == kParamDynamic) {
if (dynamic_input_index > dynamic_inputs_list.size()) {
MS_LOG(EXCEPTION) << "Dynamic input index should be less than the dynamic input's size.";
}
auto real_inputs_num = dynamic_inputs_list[dynamic_input_index];
for (auto k = 0; k < real_inputs_num; k++) {
std::string input_name = item->name() + "_dynamic_" + std::to_string(k);
(void)input_names.emplace_back(input_name);
}
} else {
(void)input_names.emplace_back(item->name());
}
dynamic_input_index++;
}
return input_names;
}
std::vector<std::string> AclUtils::GetOpOutputAnchorNames(const AnfNodePtr &node) {
auto op_info_ptr = GetKernelOpInfo(node);
auto outputs_ptr = op_info_ptr->outputs_ptr();
std::vector<std::string> output_names;
for (const auto &out_item : outputs_ptr) {
MS_EXCEPTION_IF_NULL(out_item);
if (out_item->param_type() == kParamDynamic && outputs_ptr.size() == 1) {
std::string output_name;
auto real_outputs_size = common::AnfAlgo::GetOutputTensorNum(node);
for (size_t i = 0; i < real_outputs_size; i++) {
output_name = out_item->name() + "_dynamic_" + std::to_string(i);
(void)output_names.emplace_back(output_name);
}
} else {
(void)output_names.emplace_back(out_item->name());
}
}
return output_names;
}
} // namespace kernel
} // namespace mindspore

View File

@ -17,6 +17,7 @@
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_ACL_ACL_UTILS_H_
#include <memory>
#include <string>
#include <set>
#include <vector>
#include <map>
#include "kernel/kernel.h"
@ -24,6 +25,7 @@
#include "acl/acl_op_compiler.h"
#include "acl/acl_base.h"
#include "transform/graph_ir/convert.h"
#include "kernel/oplib/oplib.h"
namespace mindspore {
namespace kernel {
@ -104,6 +106,10 @@ class AclUtils {
static std::vector<GeTensorDescPtr> GetInputTensorDesc(const AnfNodePtr &anf_node);
static std::vector<GeTensorDescPtr> GetOutputTensorDesc(const AnfNodePtr &anf_node);
static std::shared_ptr<OpInfo> GetKernelOpInfo(const AnfNodePtr &node);
static std::vector<std::string> GetOpInputAnchorNames(const AnfNodePtr &node);
static std::vector<std::string> GetOpOutputAnchorNames(const AnfNodePtr &node);
static std::set<std::string> GetUselessOutputs(const AnfNodePtr &node);
};
} // namespace kernel
} // namespace mindspore

View File

@ -31,6 +31,7 @@
#include "graph/utils/op_desc_utils.h"
#include "plugin/device/ascend/kernel/ascend_kernel_mod.h"
#include "graph/utils/tensor_utils.h"
#include "plugin/device/ascend/kernel/acl/acl_kernel_utils.h"
namespace mindspore {
namespace device {
@ -410,48 +411,8 @@ void OpTilingCalculateAdapter::AddEdge(const ::ge::NodePtr &ge_node,
void OpTilingCalculateAdapter::InitOpIoName(const CNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
MS_LOG(DEBUG) << "Get the every input name of " << op_name_;
auto op_info_ptr = mindspore::kernel::tbe::TbeDynamicShapeUtil::FindOp(op_name_, node);
MS_EXCEPTION_IF_NULL(op_info_ptr);
auto primitive = common::AnfAlgo::GetCNodePrimitive(node);
MS_EXCEPTION_IF_NULL(primitive);
auto inputs_ptr = op_info_ptr->inputs_ptr();
size_t dynamic_input_index = 0;
std::vector<int64_t> dynamic_inputs_list;
if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) {
dynamic_inputs_list = GetValue<std::vector<int64_t>>(primitive->GetAttr(kAttrDynInputSizes));
}
for (const auto &item : inputs_ptr) {
MS_EXCEPTION_IF_NULL(item);
if (item->param_type() == PARAM_DYNAMIC) {
if (dynamic_input_index > dynamic_inputs_list.size()) {
MS_LOG(EXCEPTION) << "Dynamic input index should be less than the dynamic input's size.";
}
auto real_inputs_num = dynamic_inputs_list[dynamic_input_index];
for (auto k = 0; k < real_inputs_num; k++) {
std::string input_name = item->name() + "_dynamic_" + std::to_string(k);
input_names_.emplace_back(input_name);
}
} else {
input_names_.emplace_back(item->name());
}
dynamic_input_index++;
}
// output io names
auto outputs_ptr = op_info_ptr->outputs_ptr();
for (const auto &out_item : outputs_ptr) {
MS_EXCEPTION_IF_NULL(out_item);
if (out_item->param_type() == PARAM_DYNAMIC && outputs_ptr.size() == 1) {
std::string output_name;
auto real_outputs_size = common::AnfAlgo::GetOutputTensorNum(node);
for (size_t i = 0; i < real_outputs_size; i++) {
output_name = out_item->name() + "_dynamic_" + std::to_string(i);
output_names_.emplace_back(output_name);
}
} else {
output_names_.emplace_back(out_item->name());
}
}
input_names_ = kernel::AclUtils::GetOpInputAnchorNames(node);
output_names_ = kernel::AclUtils::GetOpOutputAnchorNames(node);
}
::ge::NodePtr OpTilingCalculateAdapter::CreateGeNode(const CNodePtr &node, ::ge::ComputeGraphPtr *ge_graph,

View File

@ -2926,5 +2926,15 @@ std::map<std::string, unsigned int> GeOpConvertor::GetNeedAddInput(const AnfNode
return adpt->getAttrInputMap();
}
bool GeOpConvertor::IsDynamicInput(const AnfNodePtr &node, const size_t idx) {
MS_EXCEPTION_IF_NULL(node);
OpAdapterPtr adapterPtr = FindAdapter(node, true);
if (adapterPtr == nullptr) {
MS_LOG(INFO) << "Can't find a adapter for op:" << node->DebugString();
return false;
}
return adapterPtr->IsDynInputOp(idx);
}
} // namespace transform
} // namespace mindspore

View File

@ -67,6 +67,7 @@ class GeOpConvertor {
static mindspore::HashSet<size_t> GetNeedRemoveInput(const AnfNodePtr &node, const bool training);
static std::map<std::string, unsigned int> GetNeedAddInput(const AnfNodePtr &node, const bool training);
static bool IsDynamicInput(const AnfNodePtr &node, const size_t idx);
};
class DfGraphConvertor {

View File

@ -113,4 +113,13 @@ INPUT_MAP(IsNan) = {{1, INPUT_DESC(x)}};
ATTR_MAP(IsNan) = EMPTY_ATTR_MAP;
OUTPUT_MAP(IsNan) = {{0, OUTPUT_DESC(y)}};
REG_ADPT_DESC(IsNan, kNameIsNan, ADPT_DESC(IsNan))
// LpNorm
INPUT_MAP(LpNorm) = {{1, INPUT_DESC(x)}};
ATTR_MAP(LpNorm) = {{"p", ATTR_DESC(p, AnyTraits<int64_t>())},
{"axis", ATTR_DESC(axes, AnyTraits<std::vector<int64_t>>())},
{"keep_dims", ATTR_DESC(keepdim, AnyTraits<bool>())},
{"epsilon", ATTR_DESC(epsilon, AnyTraits<float>())}};
OUTPUT_MAP(LpNorm) = {{0, OUTPUT_DESC(y)}};
REG_ADPT_DESC(LpNorm, prim::kPrimLpNorm->name(), ADPT_DESC(LpNorm))
} // namespace mindspore::transform

View File

@ -60,5 +60,8 @@ DECLARE_OP_USE_OUTPUT(IsFinite)
DECLARE_OP_ADAPTER(IsNan)
DECLARE_OP_USE_OUTPUT(IsNan)
DECLARE_OP_ADAPTER(LpNorm)
DECLARE_OP_USE_OUTPUT(LpNorm)
} // namespace mindspore::transform
#endif // MINDSPORE_CCSRC_TRANSFORM_GRAPH_IR_OP_DECLARE_MATH_OPS_DECLARE_H_