[AMDGPU] Prevent spills before exec mask is restored

Inline spiller can decide to move a spill as early as possible in the basic block.
It will skip phis and label, but we also need to make sure it skips instructions
in the basic block prologue which restore exec mask.

Added isPositionLike callback in TargetInstrInfo to detect instructions which
shall be skipped in addition to common phis, labels etc.

Differential Revision: https://reviews.llvm.org/D27997

llvm-svn: 292554
This commit is contained in:
Stanislav Mekhanoshin 2017-01-20 00:44:31 +00:00
parent e094cc4372
commit 6ec3e3a728
5 changed files with 100 additions and 2 deletions

View File

@ -1510,6 +1510,13 @@ public:
return false; return false;
} }
/// True if the instruction is bound to the top of its basic block and no
/// other instructions shall be inserted before it. This can be implemented
/// to prevent register allocator to insert spills before such instructions.
virtual bool isBasicBlockPrologue(const MachineInstr &MI) const {
return false;
}
private: private:
unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
unsigned CatchRetOpcode; unsigned CatchRetOpcode;

View File

@ -148,8 +148,11 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
MachineBasicBlock::iterator MachineBasicBlock::iterator
MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
iterator E = end(); iterator E = end();
while (I != E && (I->isPHI() || I->isPosition())) while (I != E && (I->isPHI() || I->isPosition() ||
TII->isBasicBlockPrologue(*I)))
++I; ++I;
// FIXME: This needs to change if we wish to bundle labels // FIXME: This needs to change if we wish to bundle labels
// inside the bundle. // inside the bundle.
@ -160,8 +163,11 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator MachineBasicBlock::iterator
MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) { MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) {
const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
iterator E = end(); iterator E = end();
while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue())) while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue() ||
TII->isBasicBlockPrologue(*I)))
++I; ++I;
// FIXME: This needs to change if we wish to bundle labels / dbg_values // FIXME: This needs to change if we wish to bundle labels / dbg_values
// inside the bundle. // inside the bundle.

View File

@ -3635,3 +3635,8 @@ ScheduleHazardRecognizer *
SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const { SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
return new GCNHazardRecognizer(MF); return new GCNHazardRecognizer(MF);
} }
bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
MI.modifiesRegister(AMDGPU::EXEC, &RI);
}

View File

@ -731,6 +731,8 @@ public:
ScheduleHazardRecognizer * ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override; CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
bool isBasicBlockPrologue(const MachineInstr &MI) const override;
}; };
namespace AMDGPU { namespace AMDGPU {

View File

@ -0,0 +1,78 @@
; RUN: llc -march=amdgcn -verify-machineinstrs -stress-regalloc=6 < %s | FileCheck %s
; Inline spiller can decide to move a spill as early as possible in the basic block.
; It will skip phis and label, but we also need to make sure it skips instructions
; in the basic block prologue which restore exec mask.
; Make sure instruction to restore exec mask immediately follows label
; CHECK-LABEL: {{^}}spill_cfg_position:
; CHECK: s_cbranch_execz [[LABEL1:BB[0-9_]+]]
; CHECK: {{^}}[[LABEL1]]:
; CHECK: s_cbranch_execz [[LABEL2:BB[0-9_]+]]
; CHECK: {{^}}[[LABEL2]]:
; CHECK-NEXT: s_or_b64 exec
; CHECK: buffer_
define void @spill_cfg_position(i32 addrspace(1)* nocapture %arg) {
bb:
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #0
%tmp14 = load i32, i32 addrspace(1)* %arg, align 4
%tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
%tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4
%tmp17 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
%tmp18 = load i32, i32 addrspace(1)* %tmp17, align 4
%tmp19 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3
%tmp20 = load i32, i32 addrspace(1)* %tmp19, align 4
%tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4
%tmp22 = load i32, i32 addrspace(1)* %tmp21, align 4
%tmp23 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 5
%tmp24 = load i32, i32 addrspace(1)* %tmp23, align 4
%tmp25 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 6
%tmp26 = load i32, i32 addrspace(1)* %tmp25, align 4
%tmp27 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 7
%tmp28 = load i32, i32 addrspace(1)* %tmp27, align 4
%tmp29 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8
%tmp30 = load i32, i32 addrspace(1)* %tmp29, align 4
%tmp33 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp1
%tmp34 = load i32, i32 addrspace(1)* %tmp33, align 4
%tmp35 = icmp eq i32 %tmp34, 0
br i1 %tmp35, label %bb44, label %bb36
bb36: ; preds = %bb
%tmp37 = mul nsw i32 %tmp20, %tmp18
%tmp38 = add nsw i32 %tmp37, %tmp16
%tmp39 = mul nsw i32 %tmp24, %tmp22
%tmp40 = add nsw i32 %tmp38, %tmp39
%tmp41 = mul nsw i32 %tmp28, %tmp26
%tmp42 = add nsw i32 %tmp40, %tmp41
%tmp43 = add nsw i32 %tmp42, %tmp30
br label %bb52
bb44: ; preds = %bb
%tmp45 = mul nsw i32 %tmp18, %tmp16
%tmp46 = mul nsw i32 %tmp22, %tmp20
%tmp47 = add nsw i32 %tmp46, %tmp45
%tmp48 = mul nsw i32 %tmp26, %tmp24
%tmp49 = add nsw i32 %tmp47, %tmp48
%tmp50 = mul nsw i32 %tmp30, %tmp28
%tmp51 = add nsw i32 %tmp49, %tmp50
br label %bb52
bb52: ; preds = %bb44, %bb36
%tmp53 = phi i32 [ %tmp43, %bb36 ], [ %tmp51, %bb44 ]
%tmp54 = mul nsw i32 %tmp16, %tmp14
%tmp55 = mul nsw i32 %tmp22, %tmp18
%tmp56 = mul nsw i32 %tmp24, %tmp20
%tmp57 = mul nsw i32 %tmp30, %tmp26
%tmp58 = add i32 %tmp55, %tmp54
%tmp59 = add i32 %tmp58, %tmp56
%tmp60 = add i32 %tmp59, %tmp28
%tmp61 = add i32 %tmp60, %tmp57
%tmp62 = add i32 %tmp61, %tmp53
store i32 %tmp62, i32 addrspace(1)* %tmp33, align 4
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }