forked from mindspore-Ecosystem/mindspore
!31139 Process the UpdateState node between OptimizeAssign and EliminateRedundantOutput
Merge pull request !31139 from DeshiChen/0310_optassign
This commit is contained in:
commit
f8a885f04a
|
@ -162,12 +162,13 @@ PassManagerPtr GraphKernelOptimizer::HighLevelOpt2() const {
|
|||
// Auto recompute according to local memory burst.
|
||||
auto recompute_lv = GetPassLevelByFlag(flags.recompute_increment_threshold > 0 || flags.recompute_peak_threshold > 0);
|
||||
pm->AddPass(std::make_shared<GraphKernelRecompute>(), recompute_lv);
|
||||
pm->AddPass(std::make_shared<ExtendOutputForUpdateState>(), recompute_lv);
|
||||
pm->AddPass(std::make_shared<MergeOutputForUpdateState>(), recompute_lv);
|
||||
|
||||
// Replace Assign with InplaceAssign, and replace original output with overridden parameters
|
||||
pm->AddPass(std::make_shared<OptimizeAssign>(), OptLevel_2);
|
||||
pm->AddPass(std::make_shared<EliminateRedundantOutput>(), OptLevel_2);
|
||||
|
||||
pm->AddPass(std::make_shared<ExtendOutputForUpdateState>(), std::min(recompute_lv, OptLevel_2));
|
||||
pm->AddPass(std::make_shared<MergeOutputForUpdateState>(), std::min(recompute_lv, OptLevel_2));
|
||||
pm->AddPass(std::make_shared<EliminateRedundantOutput>(), std::min(recompute_lv, OptLevel_2));
|
||||
|
||||
// Enable atomic add
|
||||
pm->AddPass(std::make_shared<AtomicCleanInsertter>(), OptLevel_2, is_gpu || is_ascend);
|
||||
|
|
|
@ -1057,16 +1057,19 @@ bool AkgKernelJsonGenerator::CollectFusedJsonWithSingleKernel(const CNodePtr &c_
|
|||
|
||||
void ComputeCapability::GetComputeCapability() {
|
||||
#ifdef ENABLE_GPU
|
||||
if (Callback::Instance()->GetTargetFromContext() != kGPUDevice) {
|
||||
this->compute_capability_ = "Unknown";
|
||||
}
|
||||
int a, b;
|
||||
auto ret = cuDeviceGetAttribute(&a, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, 0);
|
||||
if (ret != CUDA_SUCCESS && Callback::Instance()->GetTargetFromContext() == kGPUDevice) {
|
||||
if (ret != CUDA_SUCCESS) {
|
||||
const char *msg = nullptr;
|
||||
cuGetErrorName(ret, &msg);
|
||||
MS_LOG(WARNING) << "Get CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR fail, error message: " << msg;
|
||||
return;
|
||||
}
|
||||
ret = cuDeviceGetAttribute(&b, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, 0);
|
||||
if (ret != CUDA_SUCCESS && Callback::Instance()->GetTargetFromContext() == kGPUDevice) {
|
||||
if (ret != CUDA_SUCCESS) {
|
||||
const char *msg = nullptr;
|
||||
cuGetErrorName(ret, &msg);
|
||||
MS_LOG(WARNING) << "Get CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR fail, error message: " << msg;
|
||||
|
@ -1076,6 +1079,5 @@ void ComputeCapability::GetComputeCapability() {
|
|||
#else
|
||||
this->compute_capability_ = "Unknown";
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
} // namespace mindspore::graphkernel
|
||||
|
|
Loading…
Reference in New Issue