op select and check info print specify
This commit is contained in:
parent
a17915b2c4
commit
721a22e167
|
@ -264,7 +264,7 @@ def check_support(job: TbeJob):
|
|||
job.result = "NOT_SUPPORTED"
|
||||
job.info("op module {} check support result is {}, not supported".format(op_module_name, result_str))
|
||||
if reason:
|
||||
job.info("Unsupported reason is {}".format(reason))
|
||||
job.warning("Unsupported reason is {}".format(reason))
|
||||
return True
|
||||
|
||||
|
||||
|
|
|
@ -33,6 +33,9 @@ namespace {
|
|||
void FilterInvalidKernelInfo(const CNodePtr &kernel_node,
|
||||
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_info_list);
|
||||
if (kernel_info_list->empty()) {
|
||||
return;
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
size_t output_tensor_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
size_t input_tensor_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
|
@ -67,35 +70,30 @@ void FilterInvalidKernelInfo(const CNodePtr &kernel_node,
|
|||
}
|
||||
} // namespace
|
||||
|
||||
void CheckKernelInfoListEmpty(const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list,
|
||||
const std::string &type) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_info_list);
|
||||
if (kernel_info_list->empty()) {
|
||||
MS_LOG(INFO) << "Warning: kernel info list is empty, kernel type: " << type;
|
||||
}
|
||||
}
|
||||
|
||||
void KernelQueryAll(const CNodePtr &kernel_node,
|
||||
std::vector<std::shared_ptr<kernel::KernelBuildInfo>> *kernel_info_list) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
MS_EXCEPTION_IF_NULL(kernel_info_list);
|
||||
std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
TbeMetadataInfo(kernel_node, kernel_info_list);
|
||||
if (kernel_info_list->empty()) {
|
||||
AicpuMetadataInfo(kernel_node, kernel_info_list);
|
||||
if (!kernel_info_list->empty()) {
|
||||
MS_LOG(INFO) << "The node [" << kernel_node->DebugString()
|
||||
<< "] cannot find valid TBE kernel info, try to get aicpu kernel info";
|
||||
AnfAlgo::SetNodeAttr(kAttrIsAICPUKernel, MakeValue(true), kernel_node);
|
||||
}
|
||||
}
|
||||
if (kernel_info_list->empty()) {
|
||||
GetRtKelInfo(kernel_node, kernel_info_list);
|
||||
CheckKernelInfoListEmpty(kernel_info_list, "RT_Kernel");
|
||||
}
|
||||
if (kernel_info_list->empty()) {
|
||||
HcclMetadataInfo(kernel_node, kernel_info_list);
|
||||
CheckKernelInfoListEmpty(kernel_info_list, "HCCL_Kernel");
|
||||
}
|
||||
if (kernel_info_list->empty()) {
|
||||
HostMetadataInfo(kernel_node, kernel_info_list);
|
||||
}
|
||||
if (kernel_info_list->empty()) {
|
||||
MS_EXCEPTION(NotExistsError) << "Can not find any available operator info for op [" << op_name << ", "
|
||||
<< kernel_node->fullname_with_scope()
|
||||
<< "]. Node DebugString:" << kernel_node->DebugString()
|
||||
<< ", maybe the operator can not supported on current platform. \n trace "
|
||||
<< trace::DumpSourceLines(kernel_node);
|
||||
CheckKernelInfoListEmpty(kernel_info_list, "HOST_Kernel");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -125,14 +123,6 @@ void KernelQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel
|
|||
KernelQueryAll(kernel_node, kernel_info_list);
|
||||
break;
|
||||
}
|
||||
|
||||
if (kernel_info_list->empty()) {
|
||||
MS_EXCEPTION(NotExistsError) << "Can not find any available operator info for operator ["
|
||||
<< AnfAlgo::GetCNodeName(kernel_node) << ", " << kernel_node->fullname_with_scope()
|
||||
<< "]. Node DebugString:" << kernel_node->DebugString()
|
||||
<< ", maybe the operator can not supported on current platform. \n trace "
|
||||
<< trace::DumpSourceLines(kernel_node);
|
||||
}
|
||||
// check output
|
||||
FilterInvalidKernelInfo(kernel_node, kernel_info_list);
|
||||
}
|
||||
|
|
|
@ -144,13 +144,24 @@ void PrintInfo(const nlohmann::json &info, const std::string &job_name, const in
|
|||
}
|
||||
|
||||
std::string FilterExceptionMessage(const std::vector<nlohmann::json> &all_logs) {
|
||||
std::ostringstream buffer;
|
||||
for (const auto &item : all_logs) {
|
||||
auto message = GetJsonValue<std::string>(item, kMessage);
|
||||
if (message.find("except_msg") != std::string::npos) {
|
||||
return message;
|
||||
buffer << message;
|
||||
buffer << "\n";
|
||||
}
|
||||
if (message.find("except_tuple_msg") != std::string::npos) {
|
||||
buffer << message;
|
||||
buffer << "\n";
|
||||
}
|
||||
if (message.find("Error message") != std::string::npos) {
|
||||
buffer << message;
|
||||
buffer << "\n";
|
||||
}
|
||||
}
|
||||
return "None";
|
||||
auto res = buffer.str().empty() ? "None" : buffer.str();
|
||||
return res;
|
||||
}
|
||||
|
||||
bool IsDigit(const std::string &str) {
|
||||
|
@ -273,11 +284,9 @@ void AscendKernelCompileManager::ResetOldTask() {
|
|||
}
|
||||
|
||||
void AscendKernelCompileManager::PrintProcessLog(const nlohmann::json &json, int adjust_log_level = EXCEPTION) {
|
||||
auto logs = GetJsonValue<std::vector<nlohmann::json>>(json, kProcessInfo);
|
||||
auto all_logs = GetJsonValue<std::vector<nlohmann::json>>(json, kProcessInfo);
|
||||
auto job_id = GetJsonValue<int>(json, kJobId);
|
||||
auto json_name = GetJsonValue<std::string>(json, kFusionOpName);
|
||||
std::vector<nlohmann::json> all_logs;
|
||||
(void)std::copy(logs.begin(), logs.end(), std::back_inserter(all_logs));
|
||||
std::sort(all_logs.begin(), all_logs.end(), Order);
|
||||
for (const auto &item : all_logs) {
|
||||
PrintInfo(item, json_name, job_id, adjust_log_level);
|
||||
|
@ -774,6 +783,9 @@ std::string AscendKernelCompileManager::OpSelectAndCheckResultProcess(const nloh
|
|||
}
|
||||
}
|
||||
auto res = GetJsonValue<std::string>(json, kResult);
|
||||
if (job_type == kCheckSupport && res != kFullySupported) {
|
||||
PrintProcessLog(json, WARNING);
|
||||
}
|
||||
MS_LOG(INFO) << "Job:" << job_type << " running success, " << json_name << ", get: " << res;
|
||||
return res;
|
||||
}
|
||||
|
@ -798,15 +810,15 @@ std::string AscendKernelCompileManager::AscendOpSelectFormat(const AnfNodePtr &n
|
|||
|
||||
bool AscendKernelCompileManager::AscendOpCheckSupported(const AnfNodePtr &node) {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
auto op_name = AnfAlgo::GetCNodeName(node);
|
||||
MS_LOG(INFO) << "Check supported for op [" << op_name << ", " << node->fullname_with_scope() << "]";
|
||||
auto full_name = node->fullname_with_scope();
|
||||
MS_LOG(INFO) << "Check supported for op [" << full_name << "]";
|
||||
MS_EXCEPTION_IF_NULL(build_manager_);
|
||||
auto json_creator = std::make_shared<CheckTbeJsonCreator>();
|
||||
MS_EXCEPTION_IF_NULL(json_creator);
|
||||
nlohmann::json kernel_info;
|
||||
nlohmann::json check_json;
|
||||
if (!json_creator->GenJson(node, &kernel_info)) {
|
||||
MS_LOG(EXCEPTION) << "Gen check supported json failed.[" << op_name << ", " << node->fullname_with_scope()
|
||||
MS_LOG(EXCEPTION) << "Gen check supported json failed.[" << full_name
|
||||
<< "], node trace: " << trace::DumpSourceLines(node);
|
||||
}
|
||||
JsonAssemble(kCheckSupport, kernel_info, &check_json);
|
||||
|
|
|
@ -58,13 +58,14 @@ void TbeKernelSelect::TbeMetadataInfoEx() {
|
|||
MS_EXCEPTION_IF_NULL(cnode_ptr_);
|
||||
MS_EXCEPTION_IF_NULL(kernel_info_list_);
|
||||
node_name_ = AnfAlgo::GetCNodeName(cnode_ptr_);
|
||||
full_name_ = cnode_ptr_->fullname_with_scope();
|
||||
|
||||
auto op_info_ptr = tbe::TbeDynamicShapeUtil::FindOp(node_name_, cnode_ptr_);
|
||||
if (!op_info_ptr) {
|
||||
return;
|
||||
}
|
||||
if (!TbePropertyChecker::CheckTbeProperties(cnode_ptr_)) {
|
||||
MS_LOG(INFO) << "Warning: node(" << cnode_ptr_->fullname_with_scope() << ") not support tbe aicore.";
|
||||
MS_LOG(INFO) << "Warning: node(" << full_name_ << ") is not supported by tbe ai_core.";
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -189,7 +190,7 @@ void TbeKernelSelect::GetReducePatternKernelInfo(const OpInfo &op_info) {
|
|||
|
||||
void TbeKernelSelect::FilterInVaildKernelInfo(const OpInfo &op_info) {
|
||||
if (kernel_info_list_->empty()) {
|
||||
MS_LOG(INFO) << "Warning: get kernel build info failed.";
|
||||
MS_LOG(INFO) << "Warning: get kernel build info failed. Op name: " << full_name_;
|
||||
return;
|
||||
}
|
||||
std::vector<std::shared_ptr<KernelBuildInfo>> kernel_info_list;
|
||||
|
@ -205,6 +206,11 @@ void TbeKernelSelect::FilterInVaildKernelInfo(const OpInfo &op_info) {
|
|||
}
|
||||
kernel_info_list.emplace_back(*iter);
|
||||
}
|
||||
if (kernel_info_list.empty()) {
|
||||
MS_LOG(WARNING) << "Tbe kernel info list is empty, all valid kernel info was filtered out. "
|
||||
"Check the input shape, attrs or other value of node : "
|
||||
<< full_name_;
|
||||
}
|
||||
(*kernel_info_list_) = kernel_info_list;
|
||||
}
|
||||
|
||||
|
|
|
@ -71,6 +71,7 @@ class TbeKernelSelect {
|
|||
CNodePtr cnode_ptr_;
|
||||
std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list_;
|
||||
std::string node_name_;
|
||||
std::string full_name_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -449,6 +449,9 @@ KernelSelectStatus SetMatchedKernelInfo(const CNodePtr &kernel_node,
|
|||
const std::vector<std::shared_ptr<kernel::KernelBuildInfo>> &kernel_info_list) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
KernelSelectStatus select_status = kNoMatched;
|
||||
if (kernel_info_list.empty()) {
|
||||
return select_status;
|
||||
}
|
||||
bool precision_reduce = false;
|
||||
std::shared_ptr<kernel::KernelBuildInfo> selected_kernel_info = nullptr;
|
||||
// Matched kernel info
|
||||
|
@ -469,8 +472,9 @@ KernelSelectStatus SetMatchedKernelInfo(const CNodePtr &kernel_node,
|
|||
select_status = precision_reduce ? kStatusReducePrecision : kStatusRaisePrecision;
|
||||
}
|
||||
}
|
||||
// Set kernel info to the anfnode
|
||||
MS_LOG(INFO) << "Current node: " << kernel_node->DebugString() << " selected: " << selected_kernel_info->ToString();
|
||||
// Set kernel build info to node
|
||||
MS_LOG(INFO) << "Current node: " << kernel_node->fullname_with_scope()
|
||||
<< " selected: " << selected_kernel_info->ToString();
|
||||
AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_info, kernel_node.get());
|
||||
// Set format and data type for input tensor.
|
||||
if (AnfAlgo::HasNodeAttr(kAttrPynativeNextOpName, kernel_node)) {
|
||||
|
@ -492,24 +496,24 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kern
|
|||
}
|
||||
kernel::KernelQuery(kernel_node, &kernel_info_list, kernel_type);
|
||||
auto select_status = SetMatchedKernelInfo(kernel_node, kernel_info_list);
|
||||
// If aicore not find valid kernel info reloading aicpu kernel info list to find it
|
||||
// If it can node find valid ai_core kernel info, re-find in ai_cpu kernel info
|
||||
if (select_status == kNoMatched) {
|
||||
MS_LOG(WARNING) << "The node [" << kernel_node->DebugString()
|
||||
<< "] cannot find valid TBE kernel info, try to get aicpu kernel info";
|
||||
MS_LOG(WARNING) << "The node [" << kernel_node->fullname_with_scope()
|
||||
<< "] cannot find valid TBE kernel info, try to get ai_cpu kernel info";
|
||||
kernel::AICPUQuery(kernel_node, &aicpu_kernel_info_list);
|
||||
select_status = SetMatchedKernelInfo(kernel_node, aicpu_kernel_info_list);
|
||||
AnfAlgo::SetNodeAttr(kAttrIsAICPUKernel, MakeValue(true), kernel_node);
|
||||
}
|
||||
// The kernel info not finded both in the aicpu kernel list & aicore kernel list
|
||||
// The kernel info can not find in ai_cpu kernel lists and ai_core kernel lists
|
||||
if (select_status == kNoMatched) {
|
||||
std::ostringstream buffer;
|
||||
PrintInputAndOutputInferType(buffer, kernel_node);
|
||||
MS_LOG(WARNING) << ">>> Candidates supported kernel info(input and output data type) list:";
|
||||
MS_LOG(WARNING) << ">>> The supported kernel info(input and output data type) candidates list:";
|
||||
for (size_t index = 0; index < kernel_info_list.size(); ++index) {
|
||||
MS_LOG(WARNING) << "Kernel info [" << index << "] :" << kernel_info_list[index]->ToString();
|
||||
MS_LOG(WARNING) << "Ai_core kernel info [" << index << "] :" << kernel_info_list[index]->ToString();
|
||||
}
|
||||
for (size_t index = 0; index < aicpu_kernel_info_list.size(); ++index) {
|
||||
MS_LOG(WARNING) << "Kernel info [" << (kernel_info_list.size() + index)
|
||||
MS_LOG(WARNING) << "Ai_cpu kernel info [" << (kernel_info_list.size() + index)
|
||||
<< "] :" << aicpu_kernel_info_list[index]->ToString();
|
||||
}
|
||||
if (IsPrimitiveCNode(kernel_node, prim::kPrimLabelSwitch)) {
|
||||
|
@ -519,11 +523,11 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kern
|
|||
SetTensorDeviceInfo(kernel_node);
|
||||
} else {
|
||||
MS_LOG(WARNING) << " <<<";
|
||||
MS_EXCEPTION(TypeError)
|
||||
<< "The operator [" << AnfAlgo::GetCNodeName(kernel_node)
|
||||
<< "] cannot find valid kernel info(input and output data type), not supported the data type: " << buffer.str()
|
||||
<< ", please refer to the supported data types in candidates kernel info list."
|
||||
<< " trace: " << trace::DumpSourceLines(kernel_node) << ", Node DebugString: " << kernel_node->DebugString();
|
||||
MS_LOG(EXCEPTION) << "Can not find any available operator info for operator ["
|
||||
<< kernel_node->fullname_with_scope()
|
||||
<< "]. Maybe don't supported the data type: " << buffer.str()
|
||||
<< ", or maybe the operator can not supported on current platform.\n Node trace: "
|
||||
<< trace::DumpSourceLines(kernel_node);
|
||||
}
|
||||
}
|
||||
return select_status;
|
||||
|
|
Loading…
Reference in New Issue