forked from OSchip/llvm-project
[AMDGPU] Prevent spills before exec mask is restored
Inline spiller can decide to move a spill as early as possible in the basic block. It will skip phis and label, but we also need to make sure it skips instructions in the basic block prologue which restore exec mask. Added isPositionLike callback in TargetInstrInfo to detect instructions which shall be skipped in addition to common phis, labels etc. Differential Revision: https://reviews.llvm.org/D27997 llvm-svn: 292554
This commit is contained in:
parent
e094cc4372
commit
6ec3e3a728
|
@ -1510,6 +1510,13 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
/// True if the instruction is bound to the top of its basic block and no
|
||||
/// other instructions shall be inserted before it. This can be implemented
|
||||
/// to prevent register allocator to insert spills before such instructions.
|
||||
virtual bool isBasicBlockPrologue(const MachineInstr &MI) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
|
||||
unsigned CatchRetOpcode;
|
||||
|
|
|
@ -148,8 +148,11 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
|
|||
|
||||
MachineBasicBlock::iterator
|
||||
MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
|
||||
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
|
||||
|
||||
iterator E = end();
|
||||
while (I != E && (I->isPHI() || I->isPosition()))
|
||||
while (I != E && (I->isPHI() || I->isPosition() ||
|
||||
TII->isBasicBlockPrologue(*I)))
|
||||
++I;
|
||||
// FIXME: This needs to change if we wish to bundle labels
|
||||
// inside the bundle.
|
||||
|
@ -160,8 +163,11 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
|
|||
|
||||
MachineBasicBlock::iterator
|
||||
MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) {
|
||||
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
|
||||
|
||||
iterator E = end();
|
||||
while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue()))
|
||||
while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue() ||
|
||||
TII->isBasicBlockPrologue(*I)))
|
||||
++I;
|
||||
// FIXME: This needs to change if we wish to bundle labels / dbg_values
|
||||
// inside the bundle.
|
||||
|
|
|
@ -3635,3 +3635,8 @@ ScheduleHazardRecognizer *
|
|||
SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
|
||||
return new GCNHazardRecognizer(MF);
|
||||
}
|
||||
|
||||
bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
|
||||
return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
|
||||
MI.modifiesRegister(AMDGPU::EXEC, &RI);
|
||||
}
|
||||
|
|
|
@ -731,6 +731,8 @@ public:
|
|||
|
||||
ScheduleHazardRecognizer *
|
||||
CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
|
||||
|
||||
bool isBasicBlockPrologue(const MachineInstr &MI) const override;
|
||||
};
|
||||
|
||||
namespace AMDGPU {
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs -stress-regalloc=6 < %s | FileCheck %s
|
||||
|
||||
; Inline spiller can decide to move a spill as early as possible in the basic block.
|
||||
; It will skip phis and label, but we also need to make sure it skips instructions
|
||||
; in the basic block prologue which restore exec mask.
|
||||
; Make sure instruction to restore exec mask immediately follows label
|
||||
|
||||
; CHECK-LABEL: {{^}}spill_cfg_position:
|
||||
; CHECK: s_cbranch_execz [[LABEL1:BB[0-9_]+]]
|
||||
; CHECK: {{^}}[[LABEL1]]:
|
||||
; CHECK: s_cbranch_execz [[LABEL2:BB[0-9_]+]]
|
||||
; CHECK: {{^}}[[LABEL2]]:
|
||||
; CHECK-NEXT: s_or_b64 exec
|
||||
; CHECK: buffer_
|
||||
|
||||
define void @spill_cfg_position(i32 addrspace(1)* nocapture %arg) {
|
||||
bb:
|
||||
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
%tmp14 = load i32, i32 addrspace(1)* %arg, align 4
|
||||
%tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
|
||||
%tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4
|
||||
%tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
|
||||
%tmp18 = load i32, i32 addrspace(1)* %tmp17, align 4
|
||||
%tmp19 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3
|
||||
%tmp20 = load i32, i32 addrspace(1)* %tmp19, align 4
|
||||
%tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4
|
||||
%tmp22 = load i32, i32 addrspace(1)* %tmp21, align 4
|
||||
%tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 5
|
||||
%tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4
|
||||
%tmp25 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 6
|
||||
%tmp26 = load i32, i32 addrspace(1)* %tmp25, align 4
|
||||
%tmp27 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 7
|
||||
%tmp28 = load i32, i32 addrspace(1)* %tmp27, align 4
|
||||
%tmp29 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8
|
||||
%tmp30 = load i32, i32 addrspace(1)* %tmp29, align 4
|
||||
%tmp33 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp1
|
||||
%tmp34 = load i32, i32 addrspace(1)* %tmp33, align 4
|
||||
%tmp35 = icmp eq i32 %tmp34, 0
|
||||
br i1 %tmp35, label %bb44, label %bb36
|
||||
|
||||
bb36: ; preds = %bb
|
||||
%tmp37 = mul nsw i32 %tmp20, %tmp18
|
||||
%tmp38 = add nsw i32 %tmp37, %tmp16
|
||||
%tmp39 = mul nsw i32 %tmp24, %tmp22
|
||||
%tmp40 = add nsw i32 %tmp38, %tmp39
|
||||
%tmp41 = mul nsw i32 %tmp28, %tmp26
|
||||
%tmp42 = add nsw i32 %tmp40, %tmp41
|
||||
%tmp43 = add nsw i32 %tmp42, %tmp30
|
||||
br label %bb52
|
||||
|
||||
bb44: ; preds = %bb
|
||||
%tmp45 = mul nsw i32 %tmp18, %tmp16
|
||||
%tmp46 = mul nsw i32 %tmp22, %tmp20
|
||||
%tmp47 = add nsw i32 %tmp46, %tmp45
|
||||
%tmp48 = mul nsw i32 %tmp26, %tmp24
|
||||
%tmp49 = add nsw i32 %tmp47, %tmp48
|
||||
%tmp50 = mul nsw i32 %tmp30, %tmp28
|
||||
%tmp51 = add nsw i32 %tmp49, %tmp50
|
||||
br label %bb52
|
||||
|
||||
bb52: ; preds = %bb44, %bb36
|
||||
%tmp53 = phi i32 [ %tmp43, %bb36 ], [ %tmp51, %bb44 ]
|
||||
%tmp54 = mul nsw i32 %tmp16, %tmp14
|
||||
%tmp55 = mul nsw i32 %tmp22, %tmp18
|
||||
%tmp56 = mul nsw i32 %tmp24, %tmp20
|
||||
%tmp57 = mul nsw i32 %tmp30, %tmp26
|
||||
%tmp58 = add i32 %tmp55, %tmp54
|
||||
%tmp59 = add i32 %tmp58, %tmp56
|
||||
%tmp60 = add i32 %tmp59, %tmp28
|
||||
%tmp61 = add i32 %tmp60, %tmp57
|
||||
%tmp62 = add i32 %tmp61, %tmp53
|
||||
store i32 %tmp62, i32 addrspace(1)* %tmp33, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
Loading…
Reference in New Issue