op select and check info print specify

This commit is contained in:
lby 2021-09-22 17:39:29 +08:00
parent a17915b2c4
commit 721a22e167
6 changed files with 62 additions and 49 deletions

View File

@ -264,7 +264,7 @@ def check_support(job: TbeJob):
job.result = "NOT_SUPPORTED"
job.info("op module {} check support result is {}, not supported".format(op_module_name, result_str))
if reason:
job.info("Unsupported reason is {}".format(reason))
job.warning("Unsupported reason is {}".format(reason))
return True

View File

@ -33,6 +33,9 @@ namespace {
void FilterInvalidKernelInfo(const CNodePtr &kernel_node,
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) {
MS_EXCEPTION_IF_NULL(kernel_info_list);
if (kernel_info_list->empty()) {
return;
}
MS_EXCEPTION_IF_NULL(kernel_node);
size_t output_tensor_num = AnfAlgo::GetOutputTensorNum(kernel_node);
size_t input_tensor_num = AnfAlgo::GetInputTensorNum(kernel_node);
@ -67,35 +70,30 @@ void FilterInvalidKernelInfo(const CNodePtr &kernel_node,
}
} // namespace
void CheckKernelInfoListEmpty(const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list,
const std::string &type) {
MS_EXCEPTION_IF_NULL(kernel_info_list);
if (kernel_info_list->empty()) {
MS_LOG(INFO) << "Warning: kernel info list is empty, kernel type: " << type;
}
}
void KernelQueryAll(const CNodePtr &kernel_node,
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) {
MS_EXCEPTION_IF_NULL(kernel_node);
MS_EXCEPTION_IF_NULL(kernel_info_list);
std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
TbeMetadataInfo(kernel_node, kernel_info_list);
if (kernel_info_list->empty()) {
AicpuMetadataInfo(kernel_node, kernel_info_list);
if (!kernel_info_list->empty()) {
MS_LOG(INFO) << "The node [" << kernel_node->DebugString()
<< "] cannot find valid TBE kernel info, try to get aicpu kernel info";
AnfAlgo::SetNodeAttr(kAttrIsAICPUKernel, MakeValue(true), kernel_node);
}
}
if (kernel_info_list->empty()) {
GetRtKelInfo(kernel_node, kernel_info_list);
CheckKernelInfoListEmpty(kernel_info_list, "RT_Kernel");
}
if (kernel_info_list->empty()) {
HcclMetadataInfo(kernel_node, kernel_info_list);
CheckKernelInfoListEmpty(kernel_info_list, "HCCL_Kernel");
}
if (kernel_info_list->empty()) {
HostMetadataInfo(kernel_node, kernel_info_list);
}
if (kernel_info_list->empty()) {
MS_EXCEPTION(NotExistsError) << "Can not find any available operator info for op [" << op_name << ", "
<< kernel_node->fullname_with_scope()
<< "]. Node DebugString:" << kernel_node->DebugString()
<< ", maybe the operator can not supported on current platform. \n trace "
<< trace::DumpSourceLines(kernel_node);
CheckKernelInfoListEmpty(kernel_info_list, "HOST_Kernel");
}
}
@ -125,14 +123,6 @@ void KernelQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel
KernelQueryAll(kernel_node, kernel_info_list);
break;
}
if (kernel_info_list->empty()) {
MS_EXCEPTION(NotExistsError) << "Can not find any available operator info for operator ["
<< AnfAlgo::GetCNodeName(kernel_node) << ", " << kernel_node->fullname_with_scope()
<< "]. Node DebugString:" << kernel_node->DebugString()
<< ", maybe the operator can not supported on current platform. \n trace "
<< trace::DumpSourceLines(kernel_node);
}
// check output
FilterInvalidKernelInfo(kernel_node, kernel_info_list);
}

View File

@ -144,13 +144,24 @@ void PrintInfo(const nlohmann::json &info, const std::string &job_name, const in
}
std::string FilterExceptionMessage(const std::vector<nlohmann::json> &all_logs) {
std::ostringstream buffer;
for (const auto &item : all_logs) {
auto message = GetJsonValue<std::string>(item, kMessage);
if (message.find("except_msg") != std::string::npos) {
return message;
buffer << message;
buffer << "\n";
}
if (message.find("except_tuple_msg") != std::string::npos) {
buffer << message;
buffer << "\n";
}
if (message.find("Error message") != std::string::npos) {
buffer << message;
buffer << "\n";
}
}
return "None";
auto res = buffer.str().empty() ? "None" : buffer.str();
return res;
}
bool IsDigit(const std::string &str) {
@ -273,11 +284,9 @@ void AscendKernelCompileManager::ResetOldTask() {
}
void AscendKernelCompileManager::PrintProcessLog(const nlohmann::json &json, int adjust_log_level = EXCEPTION) {
auto logs = GetJsonValue<std::vector<nlohmann::json>>(json, kProcessInfo);
auto all_logs = GetJsonValue<std::vector<nlohmann::json>>(json, kProcessInfo);
auto job_id = GetJsonValue<int>(json, kJobId);
auto json_name = GetJsonValue<std::string>(json, kFusionOpName);
std::vector<nlohmann::json> all_logs;
(void)std::copy(logs.begin(), logs.end(), std::back_inserter(all_logs));
std::sort(all_logs.begin(), all_logs.end(), Order);
for (const auto &item : all_logs) {
PrintInfo(item, json_name, job_id, adjust_log_level);
@ -774,6 +783,9 @@ std::string AscendKernelCompileManager::OpSelectAndCheckResultProcess(const nloh
}
}
auto res = GetJsonValue<std::string>(json, kResult);
if (job_type == kCheckSupport && res != kFullySupported) {
PrintProcessLog(json, WARNING);
}
MS_LOG(INFO) << "Job:" << job_type << " running success, " << json_name << ", get: " << res;
return res;
}
@ -798,15 +810,15 @@ std::string AscendKernelCompileManager::AscendOpSelectFormat(const AnfNodePtr &n
bool AscendKernelCompileManager::AscendOpCheckSupported(const AnfNodePtr &node) {
MS_EXCEPTION_IF_NULL(node);
auto op_name = AnfAlgo::GetCNodeName(node);
MS_LOG(INFO) << "Check supported for op [" << op_name << ", " << node->fullname_with_scope() << "]";
auto full_name = node->fullname_with_scope();
MS_LOG(INFO) << "Check supported for op [" << full_name << "]";
MS_EXCEPTION_IF_NULL(build_manager_);
auto json_creator = std::make_shared<CheckTbeJsonCreator>();
MS_EXCEPTION_IF_NULL(json_creator);
nlohmann::json kernel_info;
nlohmann::json check_json;
if (!json_creator->GenJson(node, &kernel_info)) {
MS_LOG(EXCEPTION) << "Gen check supported json failed.[" << op_name << ", " << node->fullname_with_scope()
MS_LOG(EXCEPTION) << "Gen check supported json failed.[" << full_name
<< "], node trace: " << trace::DumpSourceLines(node);
}
JsonAssemble(kCheckSupport, kernel_info, &check_json);

View File

@ -58,13 +58,14 @@ void TbeKernelSelect::TbeMetadataInfoEx() {
MS_EXCEPTION_IF_NULL(cnode_ptr_);
MS_EXCEPTION_IF_NULL(kernel_info_list_);
node_name_ = AnfAlgo::GetCNodeName(cnode_ptr_);
full_name_ = cnode_ptr_->fullname_with_scope();
auto op_info_ptr = tbe::TbeDynamicShapeUtil::FindOp(node_name_, cnode_ptr_);
if (!op_info_ptr) {
return;
}
if (!TbePropertyChecker::CheckTbeProperties(cnode_ptr_)) {
MS_LOG(INFO) << "Warning: node(" << cnode_ptr_->fullname_with_scope() << ") not support tbe aicore.";
MS_LOG(INFO) << "Warning: node(" << full_name_ << ") is not supported by tbe ai_core.";
return;
}
@ -189,7 +190,7 @@ void TbeKernelSelect::GetReducePatternKernelInfo(const OpInfo &op_info) {
void TbeKernelSelect::FilterInVaildKernelInfo(const OpInfo &op_info) {
if (kernel_info_list_->empty()) {
MS_LOG(INFO) << "Warning: get kernel build info failed.";
MS_LOG(INFO) << "Warning: get kernel build info failed. Op name: " << full_name_;
return;
}
std::vector<std::shared_ptr<KernelBuildInfo>> kernel_info_list;
@ -205,6 +206,11 @@ void TbeKernelSelect::FilterInVaildKernelInfo(const OpInfo &op_info) {
}
kernel_info_list.emplace_back(*iter);
}
if (kernel_info_list.empty()) {
MS_LOG(WARNING) << "Tbe kernel info list is empty, all valid kernel info was filtered out. "
"Check the input shape, attrs or other value of node : "
<< full_name_;
}
(*kernel_info_list_) = kernel_info_list;
}

View File

@ -71,6 +71,7 @@ class TbeKernelSelect {
CNodePtr cnode_ptr_;
std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list_;
std::string node_name_;
std::string full_name_;
};
} // namespace kernel
} // namespace mindspore

View File

@ -449,6 +449,9 @@ KernelSelectStatus SetMatchedKernelInfo(const CNodePtr &kernel_node,
const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list) {
MS_EXCEPTION_IF_NULL(kernel_node);
KernelSelectStatus select_status = kNoMatched;
if (kernel_info_list.empty()) {
return select_status;
}
bool precision_reduce = false;
std::shared_ptr<kernel::KernelBuildInfo> selected_kernel_info = nullptr;
// Matched kernel info
@ -469,8 +472,9 @@ KernelSelectStatus SetMatchedKernelInfo(const CNodePtr &kernel_node,
select_status = precision_reduce ? kStatusReducePrecision : kStatusRaisePrecision;
}
}
// Set kernel info to the anfnode
MS_LOG(INFO) << "Current node: " << kernel_node->DebugString() << " selected: " << selected_kernel_info->ToString();
// Set kernel build info to node
MS_LOG(INFO) << "Current node: " << kernel_node->fullname_with_scope()
<< " selected: " << selected_kernel_info->ToString();
AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info, kernel_node.get());
// Set format and data type for input tensor.
if (AnfAlgo::HasNodeAttr(kAttrPynativeNextOpName, kernel_node)) {
@ -492,24 +496,24 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kern
}
kernel::KernelQuery(kernel_node, &kernel_info_list, kernel_type);
auto select_status = SetMatchedKernelInfo(kernel_node, kernel_info_list);
// If aicore not find valid kernel info reloading aicpu kernel info list to find it
// If it can node find valid ai_core kernel info, re-find in ai_cpu kernel info
if (select_status == kNoMatched) {
MS_LOG(WARNING) << "The node [" << kernel_node->DebugString()
<< "] cannot find valid TBE kernel info, try to get aicpu kernel info";
MS_LOG(WARNING) << "The node [" << kernel_node->fullname_with_scope()
<< "] cannot find valid TBE kernel info, try to get ai_cpu kernel info";
kernel::AICPUQuery(kernel_node, &aicpu_kernel_info_list);
select_status = SetMatchedKernelInfo(kernel_node, aicpu_kernel_info_list);
AnfAlgo::SetNodeAttr(kAttrIsAICPUKernel, MakeValue(true), kernel_node);
}
// The kernel info not finded both in the aicpu kernel list & aicore kernel list
// The kernel info can not find in ai_cpu kernel lists and ai_core kernel lists
if (select_status == kNoMatched) {
std::ostringstream buffer;
PrintInputAndOutputInferType(buffer, kernel_node);
MS_LOG(WARNING) << ">>> Candidates supported kernel info(input and output data type) list:";
MS_LOG(WARNING) << ">>> The supported kernel info(input and output data type) candidates list:";
for (size_t index = 0; index < kernel_info_list.size(); ++index) {
MS_LOG(WARNING) << "Kernel info [" << index << "] :" << kernel_info_list[index]->ToString();
MS_LOG(WARNING) << "Ai_core kernel info [" << index << "] :" << kernel_info_list[index]->ToString();
}
for (size_t index = 0; index < aicpu_kernel_info_list.size(); ++index) {
MS_LOG(WARNING) << "Kernel info [" << (kernel_info_list.size() + index)
MS_LOG(WARNING) << "Ai_cpu kernel info [" << (kernel_info_list.size() + index)
<< "] :" << aicpu_kernel_info_list[index]->ToString();
}
if (IsPrimitiveCNode(kernel_node, prim::kPrimLabelSwitch)) {
@ -519,11 +523,11 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kern
SetTensorDeviceInfo(kernel_node);
} else {
MS_LOG(WARNING) << " <<<";
MS_EXCEPTION(TypeError)
<< "The operator [" << AnfAlgo::GetCNodeName(kernel_node)
<< "] cannot find valid kernel info(input and output data type), not supported the data type: " << buffer.str()
<< ", please refer to the supported data types in candidates kernel info list."
<< " trace: " << trace::DumpSourceLines(kernel_node) << ", Node DebugString: " << kernel_node->DebugString();
MS_LOG(EXCEPTION) << "Can not find any available operator info for operator ["
<< kernel_node->fullname_with_scope()
<< "]. Maybe don't supported the data type: " << buffer.str()
<< ", or maybe the operator can not supported on current platform.\n Node trace: "
<< trace::DumpSourceLines(kernel_node);
}
}
return select_status;