Merge pull request !23696 from liubuyu/bug_fix
This commit is contained in:
i-robot 2021-09-21 08:50:49 +00:00 committed by Gitee
commit 9cba76a933
5 changed files with 46 additions and 53 deletions

View File

@ -250,12 +250,15 @@ std::vector<std::string> GetTuneOpsList(const std::string &d) {
size_t p1 = 0;
size_t p2 = ops.find(d);
while (p2 != std::string::npos) {
res.emplace_back(ops.substr(p1, p2 - p1));
if (p1 < ops.length() && (p2 - p1) < ops.length()) {
(void)res.emplace_back(ops.substr(p1, p2 - p1));
}
p1 = p2 + 1;
p2 = ops.find(d, p1);
}
if (p1 != ops.length()) {
res.emplace_back(ops.substr(p1));
if (p1 <= ops.length()) {
(void)res.emplace_back(ops.substr(p1));
}
return res;
}
@ -274,7 +277,7 @@ void AscendKernelCompileManager::PrintProcessLog(const nlohmann::json &json, int
auto job_id = GetJsonValue<int>(json, kJobId);
auto json_name = GetJsonValue<std::string>(json, kFusionOpName);
std::vector<nlohmann::json> all_logs;
std::copy(logs.begin(), logs.end(), std::back_inserter(all_logs));
(void)std::copy(logs.begin(), logs.end(), std::back_inserter(all_logs));
std::sort(all_logs.begin(), all_logs.end(), Order);
for (const auto &item : all_logs) {
PrintInfo(item, json_name, job_id, adjust_log_level);
@ -327,7 +330,7 @@ void AscendKernelCompileManager::QueryResultProcess(const nlohmann::json &json,
}
}
nlohmann::json AscendKernelCompileManager::TurnStrToJson(const std::string &string) {
nlohmann::json AscendKernelCompileManager::TurnStrToJson(const std::string &string) const {
nlohmann::json json;
if (!ParseJson(string, &json)) {
MS_LOG(EXCEPTION) << "Parse build result error.";
@ -358,12 +361,12 @@ void AscendKernelCompileManager::ParseTargetJobStatus(const std::string &type, c
if (type == kPreCompile) {
build_manager_->PreTaskFinishProcess(task_info.target_job_id, build_result);
} else {
build_manager_->TaskFinishProcess(task_info.target_job_id, build_result);
(void)build_manager_->TaskFinishProcess(task_info.target_job_id, build_result);
}
success_job->emplace_back(task_info.target_job_id);
(void)success_job->emplace_back(task_info.target_job_id);
} else if (task_info.job_status == kFailed) {
if (type == kPreCompile) {
success_job->emplace_back(task_info.target_job_id);
(void)success_job->emplace_back(task_info.target_job_id);
MS_LOG(WARNING) << "Single op pre build failed ,op: " << kernel_name
<< "\n except_msg : " << task_info.except_msg;
} else {
@ -396,10 +399,10 @@ void AscendKernelCompileManager::QueryFinishJob(const std::string &job_type) {
auto job_result = build_manager_->ProcessTbeJob(query_json);
query_cnt++;
ParseTargetJobStatus(job_type, job_result, &success_job);
iter++;
(void)iter++;
}
for (auto k : success_job) {
job_list_.erase(k);
(void)job_list_.erase(k);
}
success_job.clear();
if (!job_list_.empty()) {
@ -436,21 +439,21 @@ void AscendKernelCompileManager::QueryFusionFinishJob(KernelModMap *kernel_mode_
if (kernel_mode_item.second != nullptr) {
(void)kernel_mode_ret->emplace(kernel_mode_item);
}
success_job.emplace_back(task_info.target_job_id);
(void)success_job.emplace_back(task_info.target_job_id);
} else if (task_info.job_status == kFailed) {
MS_LOG(INFO) << "FusionOp compile failed, json name: " << task_info.json_name
<< "\n Except_msg: " << task_info.except_msg;
auto target_id = task_info.target_job_id;
success_job.emplace_back(target_id);
(void)success_job.emplace_back(target_id);
build_failed_nums += 1;
}
} else {
MS_LOG(EXCEPTION) << "Fusion op query failed. message: " << build_result;
}
iter++;
(void)iter++;
}
for (auto k : success_job) {
job_list_.erase(k);
(void)job_list_.erase(k);
}
success_job.clear();
if (!job_list_.empty()) {
@ -463,14 +466,14 @@ void AscendKernelCompileManager::QueryFusionFinishJob(KernelModMap *kernel_mode_
MS_LOG(INFO) << "Compile Fusion Kernel Failed Num: " << build_failed_nums;
}
bool AscendKernelCompileManager::JsonAssemble(const std::string &job_type, const nlohmann::json &src_json,
void AscendKernelCompileManager::JsonAssemble(const std::string &job_type, const nlohmann::json &src_json,
nlohmann::json *dst_json) {
MS_EXCEPTION_IF_NULL(src_json);
MS_EXCEPTION_IF_NULL(dst_json);
static size_t job_id = 0;
static auto context_ptr = MsContext::GetInstance();
auto context_ptr = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context_ptr);
static int source_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
static uint32_t source_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
(*dst_json)[kJobType] = job_type;
(*dst_json)[kJobId] = job_id++;
(*dst_json)[kSourceId] = source_id;
@ -500,7 +503,6 @@ bool AscendKernelCompileManager::JsonAssemble(const std::string &job_type, const
} else {
(*dst_json)[kJobContent] = src_json;
}
return true;
}
void AscendKernelCompileManager::GetAllAscendNodes(const std::shared_ptr<session::KernelGraph> &kernel_graph,
@ -515,7 +517,7 @@ void AscendKernelCompileManager::GetAllAscendNodes(const std::shared_ptr<session
KernelType kernel_type = AnfAlgo::GetKernelType(anf_node);
if (kernel_type == TBE_KERNEL) {
if (AnfAlgo::GetKernelMod(anf_node) == nullptr) {
tbe_nodes->push_back(anf_node);
(void)tbe_nodes->push_back(anf_node);
}
}
}
@ -544,10 +546,7 @@ void AscendKernelCompileManager::AscendPreBuild(const std::shared_ptr<session::K
}
auto json_name = json_creator->GetJsonName();
nlohmann::json build_json;
if (!JsonAssemble(kPreCompile, kernel_json, &build_json)) {
MS_LOG(EXCEPTION) << "Assemble json failed, job type: " << kPreCompile
<< ", node trace: " << trace::DumpSourceLines(node);
}
JsonAssemble(kPreCompile, kernel_json, &build_json);
auto build_result = build_manager_->ProcessTbeJob(build_json);
auto json_obj = TurnStrToJson(build_result);
PrintCompileResult(json_obj);
@ -556,8 +555,8 @@ void AscendKernelCompileManager::AscendPreBuild(const std::shared_ptr<session::K
if (json_obj.at(kStatus) == kRunning) {
std::pair<int, nlohmann::json> pair(task_id, build_json);
std::pair<int, AnfNodePtr> id_node(task_id, node);
job_list_.insert(pair);
job_id_to_node_.insert(id_node);
(void)job_list_.insert(pair);
(void)job_id_to_node_.insert(id_node);
} else if (json_obj.at(kStatus) == kSuccess) {
std::string build_res = GetJsonValue<std::string>(json_obj, kResult);
build_manager_->PreTaskFinishProcess(task_id, build_res);
@ -608,10 +607,7 @@ bool AscendKernelCompileManager::AscendSingleOpCompile(const std::vector<AnfNode
nlohmann::json build_json;
job_type = is_tune_flag_ ? kTune : kCompile;
if (!JsonAssemble(job_type, kernel_json, &build_json)) {
MS_LOG(EXCEPTION) << "Assemble json failed, job type: " << kCompile << ", op:[" << op_name << ", "
<< node->fullname_with_scope() << "], node trace: " << trace::DumpSourceLines(node);
}
JsonAssemble(job_type, kernel_json, &build_json);
auto build_str = build_json.dump(indent);
MS_LOG(DEBUG) << "Op build json file : " << build_str;
TbeUtils::SaveJsonInfo(json_name, build_str);
@ -634,7 +630,7 @@ bool AscendKernelCompileManager::AscendSingleOpCompile(const std::vector<AnfNode
// job running success, save build result.
MS_LOG(DEBUG) << "Target job compile success, save build result, json name: " << json_name;
std::string build_res = GetJsonValue<std::string>(json_obj, kResult);
build_manager_->TaskFinishProcess(task_id, build_res);
(void)build_manager_->TaskFinishProcess(task_id, build_res);
} else {
// job running failed, raise exception (only single op)
ResetOldTask();
@ -690,9 +686,7 @@ KernelModMap AscendKernelCompileManager::AscendFusionOpCompile(const std::vector
nlohmann::json build_json;
const std::string job_type = is_tune_flag_ ? kTune : kFusionCompile;
if (!JsonAssemble(job_type, fusion_op, &build_json)) {
MS_LOG(EXCEPTION) << "Assemble json failed, job type: [" << kFusionCompile << "], json name: " << json_name;
}
JsonAssemble(job_type, fusion_op, &build_json);
auto build_str = build_json.dump(indent);
MS_LOG(DEBUG) << "FusionOp build json file : " << build_str;
TbeUtils::SaveJsonInfo(json_name, build_str);
@ -729,9 +723,8 @@ void AscendKernelCompileManager::PrintInitResult(const nlohmann::json &json) {
MS_LOG(DEBUG) << "Job: " << job_type << " result processing.";
// init only concern about result, but don't care about the process.
if (json.at(kStatus) == kFailed) {
auto all_logs = GetJsonValue<std::vector<nlohmann::json>>(json, kProcessInfo);
auto message = FilterExceptionMessage(all_logs);
MS_LOG(EXCEPTION) << "Job " << job_type << " running failed, except_msg: " << message;
PrintProcessLog(json);
MS_LOG(EXCEPTION) << "Job " << job_type << " running failed.";
}
MS_LOG(INFO) << "Job: " << job_type << " running success.";
}
@ -746,9 +739,7 @@ void AscendKernelCompileManager::TbeInitialize() {
MS_EXCEPTION_IF_NULL(build_manager_);
nlohmann::json init_json;
nlohmann::json soc_info = TbeUtils::GenSocInfo();
if (!JsonAssemble(kInitialize, soc_info, &init_json)) {
MS_LOG(EXCEPTION) << "Assemble json failed, job type: Initialize.";
}
JsonAssemble(kInitialize, soc_info, &init_json);
auto offline_tune = (init_json[kJobContent][kSocInfo][kOfflineTune]).get<bool>();
op_debug_level_ = (init_json[kJobContent][kSocInfo]["op_debug_level"]).get<std::string>();
auto auto_tiling_mode = (init_json[kJobContent][kSocInfo]["autoTilingMode"]).get<std::string>();
@ -799,9 +790,7 @@ std::string AscendKernelCompileManager::AscendOpSelectFormat(const AnfNodePtr &n
if (!json_creator->GenJson(node, &kernel_info)) {
MS_LOG(EXCEPTION) << "Gen select json failed. [" << op_name << ", " << node->fullname_with_scope() << "]";
}
if (!JsonAssemble(kSelectFormat, kernel_info, &select_json)) {
MS_LOG(EXCEPTION) << "Assemble json failed, job type: SelectFormat";
}
JsonAssemble(kSelectFormat, kernel_info, &select_json);
auto select_ret = build_manager_->ProcessTbeJob(select_json);
auto json_ret = TurnStrToJson(select_ret);
return OpSelectAndCheckResultProcess(json_ret, node);
@ -820,9 +809,7 @@ bool AscendKernelCompileManager::AscendOpCheckSupported(const AnfNodePtr &node)
MS_LOG(EXCEPTION) << "Gen check supported json failed.[" << op_name << ", " << node->fullname_with_scope()
<< "], node trace: " << trace::DumpSourceLines(node);
}
if (!JsonAssemble(kCheckSupport, kernel_info, &check_json)) {
MS_LOG(EXCEPTION) << "Assemble json failed, job type: CheckSupport. Node trace: " << trace::DumpSourceLines(node);
}
JsonAssemble(kCheckSupport, kernel_info, &check_json);
auto check_ret = build_manager_->ProcessTbeJob(check_json);
auto json_ret = TurnStrToJson(check_ret);
std::string check_info = OpSelectAndCheckResultProcess(json_ret, node);

View File

@ -72,12 +72,12 @@ class AscendKernelCompileManager {
void QueryPreBuildFinishJob();
void QueryFusionFinishJob(KernelModMap *kernel_mode_ret);
void PrintProcessLog(const nlohmann::json &json, int adjust_log_level);
bool JsonAssemble(const std::string &job_type, const nlohmann::json &src_json, nlohmann::json *dst_json);
void JsonAssemble(const std::string &job_type, const nlohmann::json &src_json, nlohmann::json *dst_json);
void PrintInitResult(const nlohmann::json &json);
void PrintCompileResult(const nlohmann::json &json);
std::string OpSelectAndCheckResultProcess(const nlohmann::json &json, const AnfNodePtr &node);
void QueryResultProcess(const nlohmann::json &json, TargetJobStatus *task_info);
nlohmann::json TurnStrToJson(const std::string &str);
nlohmann::json TurnStrToJson(const std::string &str) const;
static bool tbe_init_flag_;
static bool is_tune_flag_;

View File

@ -64,7 +64,7 @@ class ParallelBuildManager {
// Interactive with real backend, who could be implemented by Python.
static int StartCompileOp(const nlohmann::json &kernel_json);
static std::string ProcessTbeJob(const nlohmann::json &kernel_json);
std::string ProcessTbeJob(const nlohmann::json &kernel_json);
static bool WaitOne(int *task_id, std::string *task_result, std::string *build_result);
void ResetTaskInfo() noexcept;
AnfNodePtr GetAnfNodeByTaskID(int32_t task_id);

View File

@ -2237,11 +2237,11 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map<uint32_t, te
MS_EXCEPTION_IF_NULL(abs);
auto tuple_get_item_indexk = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
auto abs_i = abs->elements()[tuple_get_item_indexk];
args_spec_list.emplace_back(abs_i);
(void)args_spec_list.emplace_back(abs_i);
} else if (cnode_input->isa<CNode>() && AnfAlgo::GetCNodeName(cnode_input) == prim::kPrimReshape->name()) {
args_spec_list.emplace_back(cnode_input->abstract());
(void)args_spec_list.emplace_back(cnode_input->abstract());
} else {
args_spec_list.emplace_back(real_input->abstract());
(void)args_spec_list.emplace_back(real_input->abstract());
}
}
auto eval_result = opt::CppInferShape(primitive, args_spec_list);

View File

@ -542,14 +542,14 @@ std::vector<int64_t> NcdhwDeviceDynamicShape(const std::vector<int64_t> &shape)
// eg. [2,3,4] => [2,4,3]; [2,3,4,5] => [2,4,5,3]
std::vector<size_t> ChannelLastDeviceShape(const std::vector<size_t> &shape) {
auto dim = shape.size();
std::vector<int64_t> axis;
std::vector<size_t> axis;
axis.resize(dim);
int step_value = 2;
std::iota(axis.begin() + 1, axis.end(), step_value);
axis[dim - 1] = 1;
std::vector<size_t> device_shape;
std::transform(axis.begin(), axis.end(), std::back_inserter(device_shape), [&shape](int n) { return shape[n]; });
std::transform(axis.begin(), axis.end(), std::back_inserter(device_shape), [&shape](size_t n) { return shape[n]; });
return device_shape;
}
@ -600,6 +600,9 @@ std::vector<int64_t> FracZDeviceShapeWithGroups(const std::vector<int64_t> &shap
int64_t c1_dim = Shape::SHP_ANY;
int64_t g_dim = Shape::SHP_ANY;
int64_t n1 = Shape::SHP_ANY;
if (groups <= 0) {
MS_LOG(EXCEPTION) << "The value of groups should be greater than 0, but got " << groups;
}
if (!HasShapeDynamic({shape[kC], shape[kN]})) {
size_t group_size = LongToSize(groups);
size_t cin_ori_tmp = LongToSize(shape[kC]);
@ -1967,6 +1970,9 @@ bool NchwFracZTransWithGroups(const FormatArgs &args, void *result, bool to_devi
return false;
}
size_t e_mult = std::min(Lcm(Lcm(cin_ori, kCubeSize) / cin_ori, Lcm(cout_ori, kCubeSize) / cout_ori), group_size);
if (e_mult == 0) {
MS_LOG(EXCEPTION) << "The value of e_mult should be greater than 0, but got " << e_mult;
}
size_t cin_opt = DivCeil(e_mult * cin_ori, kCubeSize) * kCubeSize;
size_t cout_opt = DivCeil(e_mult * cout_ori, kCubeSize) * kCubeSize;
size_t c1_dim = cin_opt / kCubeSize;