diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 8e20541df5ae..19e36669a254 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -1044,6 +1044,13 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) { !IsGuaranteedToExecute(I.getParent())) return false; + // Convergent attribute has been used on operations that involve inter-thread + // communication which results are implicitly affected by the enclosing + // control flows. It is not safe to hoist or sink such operations across + // control flow. + if (I.isConvergent()) + return false; + return true; } diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 3c9aa946d607..6b8973d3cad7 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1163,6 +1163,13 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, if (CI->mayThrow()) return false; + // Convergent attribute has been used on operations that involve + // inter-thread communication which results are implicitly affected by the + // enclosing control flows. It is not safe to hoist or sink such operations + // across control flow. + if (CI->isConvergent()) + return false; + using namespace PatternMatch; if (match(CI, m_Intrinsic())) // Assumes don't actually alias anything or throw diff --git a/llvm/test/CodeGen/AMDGPU/machinelicm-convergent.mir b/llvm/test/CodeGen/AMDGPU/machinelicm-convergent.mir new file mode 100644 index 000000000000..5ac2713f247a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/machinelicm-convergent.mir @@ -0,0 +1,32 @@ +# RUN: llc -march=amdgcn -run-pass=early-machinelicm -o - %s | FileCheck %s + +# Test to check machine LICM does not hoist convergent instructions, +# DS_PERMUTE_B32 in this example. + +--- +# CHECK-LABEL: name: _amdgpu_cs_main +# CHECK: bb.1: +# CHECK: DS_PERMUTE_B32 + +name: _amdgpu_cs_main +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + successors: %bb.1 + + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = COPY $vgpr1 + + bb.1: + successors: %bb.1, %bb.2 + + %2:vgpr_32 = DS_PERMUTE_B32 %0, %1, 0, implicit $exec + %3:vgpr_32 = V_ADD_CO_U32_e32 %0, %2, implicit-def $vcc, implicit $exec + S_CBRANCH_SCC1 %bb.1, implicit undef $scc + S_BRANCH %bb.2 + + bb.2: + $vgpr0 = COPY %3 + S_ENDPGM 0 + +... diff --git a/llvm/test/Transforms/LICM/convergent.ll b/llvm/test/Transforms/LICM/convergent.ll new file mode 100644 index 000000000000..9ff95bf1d320 --- /dev/null +++ b/llvm/test/Transforms/LICM/convergent.ll @@ -0,0 +1,21 @@ +; RUN: opt < %s -S -licm | FileCheck %s + +; Check that we do not hoist convergent functions out of loop +; CHECK: define i32 @test +; CHECK: loop: +; CHECK: call i32 @f + +define i32 @test(i32* nocapture noalias %x, i32* nocapture %y) { +entry: + br label %loop + +loop: + %a = call i32 @f() nounwind readnone convergent + %exitcond = icmp ne i32 %a, 0 + br i1 %exitcond, label %end, label %loop + +end: + ret i32 %a +} + +declare i32 @f() nounwind readnone convergent