!31139 Process the UpdateState node between OptimizeAssign and EliminateRedundantOutput

Merge pull request !31139 from DeshiChen/0310_optassign
This commit is contained in:
i-robot 2022-03-11 07:01:01 +00:00 committed by Gitee
commit f8a885f04a
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
2 changed files with 9 additions and 6 deletions

View File

@ -162,12 +162,13 @@ PassManagerPtr GraphKernelOptimizer::HighLevelOpt2() const {
// Auto recompute according to local memory burst.
auto recompute_lv = GetPassLevelByFlag(flags.recompute_increment_threshold > 0 || flags.recompute_peak_threshold > 0);
pm->AddPass(std::make_shared<GraphKernelRecompute>(), recompute_lv);
pm->AddPass(std::make_shared<ExtendOutputForUpdateState>(), recompute_lv);
pm->AddPass(std::make_shared<MergeOutputForUpdateState>(), recompute_lv);
// Replace Assign with InplaceAssign, and replace original output with overridden parameters
pm->AddPass(std::make_shared<OptimizeAssign>(), OptLevel_2);
pm->AddPass(std::make_shared<EliminateRedundantOutput>(), OptLevel_2);
pm->AddPass(std::make_shared<ExtendOutputForUpdateState>(), std::min(recompute_lv, OptLevel_2));
pm->AddPass(std::make_shared<MergeOutputForUpdateState>(), std::min(recompute_lv, OptLevel_2));
pm->AddPass(std::make_shared<EliminateRedundantOutput>(), std::min(recompute_lv, OptLevel_2));
// Enable atomic add
pm->AddPass(std::make_shared<AtomicCleanInsertter>(), OptLevel_2, is_gpu || is_ascend);

View File

@ -1057,16 +1057,19 @@ bool AkgKernelJsonGenerator::CollectFusedJsonWithSingleKernel(const CNodePtr &c_
void ComputeCapability::GetComputeCapability() {
#ifdef ENABLE_GPU
if (Callback::Instance()->GetTargetFromContext() != kGPUDevice) {
this->compute_capability_ = "Unknown";
}
int a, b;
auto ret = cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, 0);
if (ret != CUDA_SUCCESS && Callback::Instance()->GetTargetFromContext() == kGPUDevice) {
if (ret != CUDA_SUCCESS) {
const char *msg = nullptr;
cuGetErrorName(ret, &msg);
MS_LOG(WARNING) << "Get CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR fail, error message: " << msg;
return;
}
ret = cuDeviceGetAttribute(&b, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, 0);
if (ret != CUDA_SUCCESS && Callback::Instance()->GetTargetFromContext() == kGPUDevice) {
if (ret != CUDA_SUCCESS) {
const char *msg = nullptr;
cuGetErrorName(ret, &msg);
MS_LOG(WARNING) << "Get CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR fail, error message: " << msg;
@ -1076,6 +1079,5 @@ void ComputeCapability::GetComputeCapability() {
#else
this->compute_capability_ = "Unknown";
#endif
return;
}
} // namespace mindspore::graphkernel