forked from OSchip/llvm-project
Prevent LICM and machineLICM from hoisting convergent operations
Results of convergent operations are implicitly affected by the enclosing control flows and should not be hoisted out of arbitrary loops. Patch by Xiaoqing Wu <xiaoqing_wu@apple.com> Differential Revision: https://reviews.llvm.org/D90361
This commit is contained in:
parent
137ff73317
commit
a585228027
|
@ -1044,6 +1044,13 @@ bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
|
|||
!IsGuaranteedToExecute(I.getParent()))
|
||||
return false;
|
||||
|
||||
// Convergent attribute has been used on operations that involve inter-thread
|
||||
// communication which results are implicitly affected by the enclosing
|
||||
// control flows. It is not safe to hoist or sink such operations across
|
||||
// control flow.
|
||||
if (I.isConvergent())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -1163,6 +1163,13 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
|
|||
if (CI->mayThrow())
|
||||
return false;
|
||||
|
||||
// Convergent attribute has been used on operations that involve
|
||||
// inter-thread communication which results are implicitly affected by the
|
||||
// enclosing control flows. It is not safe to hoist or sink such operations
|
||||
// across control flow.
|
||||
if (CI->isConvergent())
|
||||
return false;
|
||||
|
||||
using namespace PatternMatch;
|
||||
if (match(CI, m_Intrinsic<Intrinsic::assume>()))
|
||||
// Assumes don't actually alias anything or throw
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
# RUN: llc -march=amdgcn -run-pass=early-machinelicm -o - %s | FileCheck %s
|
||||
|
||||
# Test to check machine LICM does not hoist convergent instructions,
|
||||
# DS_PERMUTE_B32 in this example.
|
||||
|
||||
---
|
||||
# CHECK-LABEL: name: _amdgpu_cs_main
|
||||
# CHECK: bb.1:
|
||||
# CHECK: DS_PERMUTE_B32
|
||||
|
||||
name: _amdgpu_cs_main
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
successors: %bb.1
|
||||
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
%1:vgpr_32 = COPY $vgpr1
|
||||
|
||||
bb.1:
|
||||
successors: %bb.1, %bb.2
|
||||
|
||||
%2:vgpr_32 = DS_PERMUTE_B32 %0, %1, 0, implicit $exec
|
||||
%3:vgpr_32 = V_ADD_CO_U32_e32 %0, %2, implicit-def $vcc, implicit $exec
|
||||
S_CBRANCH_SCC1 %bb.1, implicit undef $scc
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.2:
|
||||
$vgpr0 = COPY %3
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
|
@ -0,0 +1,21 @@
|
|||
; RUN: opt < %s -S -licm | FileCheck %s
|
||||
|
||||
; Check that we do not hoist convergent functions out of loop
|
||||
; CHECK: define i32 @test
|
||||
; CHECK: loop:
|
||||
; CHECK: call i32 @f
|
||||
|
||||
define i32 @test(i32* nocapture noalias %x, i32* nocapture %y) {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%a = call i32 @f() nounwind readnone convergent
|
||||
%exitcond = icmp ne i32 %a, 0
|
||||
br i1 %exitcond, label %end, label %loop
|
||||
|
||||
end:
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
declare i32 @f() nounwind readnone convergent
|
Loading…
Reference in New Issue