forked from OSchip/llvm-project
AMDGPU: Fix debug info handling in post-RA bundler
This was allowing debug instructions to break the bundling, which would change scheduling behavior. Bundle debug info / kills inside the bundle. This seems to work OK, although the asm printer doesn't understand these in a bundle. This implicitly expects the memory legalizer to unbundle. It would probably be slightly nicer to move these after. Rewrite the loop to be clearer and make sure we don't end a bundle on a meta instruction, only allow them in between other valid bundle instructions.
This commit is contained in:
parent
3c8bf29f14
commit
c320e8196a
|
@ -48,10 +48,15 @@ private:
|
|||
|
||||
SmallSet<Register, 16> Defs;
|
||||
|
||||
bool isBundleCandidate(const MachineInstr &MI) const;
|
||||
bool isDependentLoad(const MachineInstr &MI) const;
|
||||
|
||||
bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const;
|
||||
};
|
||||
|
||||
constexpr uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF |
|
||||
SIInstrFlags::SMRD | SIInstrFlags::DS |
|
||||
SIInstrFlags::FLAT | SIInstrFlags::MIMG;
|
||||
|
||||
} // End anonymous namespace.
|
||||
|
||||
INITIALIZE_PASS(SIPostRABundler, DEBUG_TYPE, "SI post-RA bundler", false, false)
|
||||
|
@ -80,55 +85,73 @@ bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const {
|
||||
const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
|
||||
return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled();
|
||||
}
|
||||
|
||||
bool SIPostRABundler::canBundle(const MachineInstr &MI,
|
||||
const MachineInstr &NextMI) const {
|
||||
const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
|
||||
|
||||
return (IMemFlags != 0 && MI.mayLoadOrStore() && !NextMI.isBundled() &&
|
||||
NextMI.mayLoad() == MI.mayLoad() && NextMI.mayStore() == MI.mayStore() &&
|
||||
((NextMI.getDesc().TSFlags & MemFlags) == IMemFlags) &&
|
||||
!isDependentLoad(NextMI));
|
||||
}
|
||||
|
||||
bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
|
||||
TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
|
||||
bool Changed = false;
|
||||
const uint64_t MemFlags = SIInstrFlags::MTBUF | SIInstrFlags::MUBUF |
|
||||
SIInstrFlags::SMRD | SIInstrFlags::DS |
|
||||
SIInstrFlags::FLAT | SIInstrFlags::MIMG;
|
||||
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
MachineBasicBlock::instr_iterator Next;
|
||||
MachineBasicBlock::instr_iterator B = MBB.instr_begin();
|
||||
MachineBasicBlock::instr_iterator E = MBB.instr_end();
|
||||
|
||||
for (auto I = B; I != E; I = Next) {
|
||||
Next = std::next(I);
|
||||
|
||||
const uint64_t IMemFlags = I->getDesc().TSFlags & MemFlags;
|
||||
|
||||
if (IMemFlags == 0 || I->isBundled() || !I->mayLoadOrStore() ||
|
||||
B->mayLoad() != I->mayLoad() || B->mayStore() != I->mayStore() ||
|
||||
((B->getDesc().TSFlags & MemFlags) != IMemFlags) ||
|
||||
isDependentLoad(*I)) {
|
||||
|
||||
if (B != I) {
|
||||
if (std::next(B) != I) {
|
||||
finalizeBundle(MBB, B, I);
|
||||
Changed = true;
|
||||
}
|
||||
Next = I;
|
||||
}
|
||||
|
||||
B = Next;
|
||||
Defs.clear();
|
||||
if (!isBundleCandidate(*I))
|
||||
continue;
|
||||
|
||||
assert(Defs.empty());
|
||||
|
||||
if (I->getNumExplicitDefs() != 0)
|
||||
Defs.insert(I->defs().begin()->getReg());
|
||||
|
||||
MachineBasicBlock::instr_iterator BundleStart = I;
|
||||
MachineBasicBlock::instr_iterator BundleEnd = I;
|
||||
unsigned ClauseLength = 1;
|
||||
for (I = Next; I != E; I = Next) {
|
||||
Next = std::next(I);
|
||||
|
||||
assert(BundleEnd != I);
|
||||
if (canBundle(*BundleEnd, *I)) {
|
||||
BundleEnd = I;
|
||||
if (I->getNumExplicitDefs() != 0)
|
||||
Defs.insert(I->defs().begin()->getReg());
|
||||
++ClauseLength;
|
||||
} else if (!I->isMetaInstruction()) {
|
||||
// Allow meta instructions in between bundle candidates, but do not
|
||||
// start or end a bundle on one.
|
||||
//
|
||||
// TODO: It may be better to move meta instructions like dbg_value
|
||||
// after the bundle. We're relying on the memory legalizer to unbundle
|
||||
// these.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (I->getNumExplicitDefs() == 0)
|
||||
continue;
|
||||
Next = std::next(BundleEnd);
|
||||
if (ClauseLength > 1) {
|
||||
Changed = true;
|
||||
finalizeBundle(MBB, BundleStart, Next);
|
||||
}
|
||||
|
||||
Defs.insert(I->defs().begin()->getReg());
|
||||
Defs.clear();
|
||||
}
|
||||
|
||||
if (B != E && std::next(B) != E) {
|
||||
finalizeBundle(MBB, B, E);
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
Defs.clear();
|
||||
}
|
||||
|
||||
return Changed;
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+xnack -amdgpu-max-memory-clause=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
||||
|
||||
; Test the behavior of the post-RA soft clause bundler in the presence
|
||||
; of debug info. The debug info should not interfere with the
|
||||
; bundling, which could result in an observable codegen change.
|
||||
|
||||
define amdgpu_kernel void @dbg_clause(float addrspace(1)* %out, float addrspace(1)* %aptr) !dbg !4 {
|
||||
; GCN-LABEL: dbg_clause:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
|
||||
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: global_load_dword v1, v0, s[2:3]
|
||||
; GCN-NEXT: ;DEBUG_VALUE: foo:a <- $vgpr1
|
||||
; GCN-NEXT: global_load_dword v2, v0, s[2:3] offset:32
|
||||
; GCN-NEXT: ;DEBUG_VALUE: foo:b <- $vgpr2
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
; GCN-NEXT: v_add_f32_e32 v1, v1, v2
|
||||
; GCN-NEXT: global_store_dword v0, v1, s[0:1]
|
||||
; GCN-NEXT: s_endpgm
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
|
||||
%gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
|
||||
%gep1 = getelementptr float, float addrspace(1)* %gep0, i32 8
|
||||
%a = load float, float addrspace(1)* %gep0, align 4
|
||||
call void @llvm.dbg.value(metadata float %a, metadata !8, metadata !DIExpression()), !dbg !9
|
||||
%b = load float, float addrspace(1)* %gep1, align 4
|
||||
call void @llvm.dbg.value(metadata float %b, metadata !10, metadata !DIExpression()), !dbg !11
|
||||
%fadd = fadd float %a, %b
|
||||
store float %fadd, float addrspace(1)* %out.gep, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
||||
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
|
||||
|
||||
attributes #0 = { nounwind readnone speculatable willreturn }
|
||||
attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!2, !3}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug)
|
||||
!1 = !DIFile(filename: "/tmp/foo.cl", directory: "/dev/null")
|
||||
!2 = !{i32 2, !"Dwarf Version", i32 4}
|
||||
!3 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
|
||||
!5 = !DISubroutineType(types: !6)
|
||||
!6 = !{null, !7}
|
||||
!7 = !DIBasicType(name: "float", size: 32, align: 32)
|
||||
!8 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 1)
|
||||
!9 = !DILocation(line: 1, column: 42, scope: !4)
|
||||
!10 = !DILocalVariable(name: "b", arg: 2, scope: !4, file: !1, line: 2)
|
||||
!11 = !DILocation(line: 2, column: 42, scope: !4)
|
|
@ -56,12 +56,12 @@ body: |
|
|||
; GCN: BUFFER_STORE_DWORD_ADDR64 $vgpr0, $vgpr2_vgpr3, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: }
|
||||
; GCN: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit undef $vgpr4_vgpr5_vgpr6_vgpr7, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
|
||||
; GCN: $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr2 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
|
||||
; GCN: $vgpr3 = IMAGE_LOAD_V1_V4 undef $vgpr4_vgpr5_vgpr6_vgpr7, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
|
||||
; GCN: }
|
||||
; GCN: BUNDLE implicit undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec {
|
||||
; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
|
||||
; GCN: IMAGE_STORE_V4_V2 undef $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr0_vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, -1, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16)
|
||||
; GCN: }
|
||||
; GCN: S_NOP 0
|
||||
; GCN: $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX8_IMM undef $sgpr10_sgpr11, 464, 0, 0
|
||||
|
@ -112,3 +112,111 @@ body: |
|
|||
$vgpr2 = DS_READ_B32_gfx9 $vgpr0, 0, 0, implicit $exec
|
||||
$vgpr3 = DS_READ_B32_gfx9 $vgpr1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
# Middle dbg_value should be bundled
|
||||
---
|
||||
name: bundle_dbg_value_0
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
|
||||
; GCN-LABEL: name: bundle_dbg_value_0
|
||||
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
|
||||
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
|
||||
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: DBG_VALUE internal $vgpr0, 0, 0
|
||||
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: }
|
||||
$vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
DBG_VALUE $vgpr0, 0, 0
|
||||
$vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
...
|
||||
|
||||
# Middle dbg_value should be bundled
|
||||
---
|
||||
name: bundle_dbg_value_1
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
|
||||
; GCN-LABEL: name: bundle_dbg_value_1
|
||||
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
|
||||
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr1, implicit $vgpr5_vgpr6 {
|
||||
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: DBG_VALUE internal $vgpr0, 0, 0
|
||||
; GCN: DBG_VALUE $vgpr1, 0, 0
|
||||
; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: }
|
||||
; GCN: DBG_VALUE $vgpr2, 0, 0
|
||||
$vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
DBG_VALUE $vgpr0, 0, 0
|
||||
DBG_VALUE $vgpr1, 0, 0
|
||||
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
DBG_VALUE $vgpr2, 0, 0
|
||||
...
|
||||
|
||||
# Starting and ending dbg_values should not be in the bundle
|
||||
---
|
||||
name: bundle_dbg_value_2
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
|
||||
; GCN-LABEL: name: bundle_dbg_value_2
|
||||
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6, $vgpr1
|
||||
; GCN: DBG_VALUE $vgpr1, 0, 0
|
||||
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
|
||||
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: DBG_VALUE internal $vgpr0, 0, 0
|
||||
; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: }
|
||||
; GCN: DBG_VALUE $vgpr2, 0, 0
|
||||
DBG_VALUE $vgpr1, 0, 0
|
||||
$vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
DBG_VALUE $vgpr0, 0, 0
|
||||
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
DBG_VALUE $vgpr2, 0, 0
|
||||
...
|
||||
|
||||
---
|
||||
name: bundle_kill
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
|
||||
; GCN-LABEL: name: bundle_kill
|
||||
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
|
||||
; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr1, implicit $vgpr5_vgpr6 {
|
||||
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: KILL $vgpr1
|
||||
; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: }
|
||||
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
||||
$vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
KILL $vgpr1
|
||||
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: bundle_kill_def_in_bundle
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
|
||||
; GCN-LABEL: name: bundle_kill_def_in_bundle
|
||||
; GCN: liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
|
||||
; GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
|
||||
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: KILL internal $vgpr0
|
||||
; GCN: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: }
|
||||
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
||||
$vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
KILL $vgpr0
|
||||
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue