forked from OSchip/llvm-project
[AMDGPU] Ignore KILLs when forming clauses
KILL instructions are sometimes present and prevented hard clauses from being formed. Fix this by ignoring all meta instructions in clauses. Differential Revision: https://reviews.llvm.org/D106042
This commit is contained in:
parent
63bb2d585e
commit
bf980930e5
|
@ -268,6 +268,12 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (MI->isMetaInstruction()) {
|
||||||
|
if (isVerbose())
|
||||||
|
OutStreamer->emitRawComment(" meta instruction");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
MCInst TmpInst;
|
MCInst TmpInst;
|
||||||
MCInstLowering.lower(MI, TmpInst);
|
MCInstLowering.lower(MI, TmpInst);
|
||||||
EmitToStreamer(*OutStreamer, TmpInst);
|
EmitToStreamer(*OutStreamer, TmpInst);
|
||||||
|
|
|
@ -58,6 +58,8 @@ enum HardClauseType {
|
||||||
// Internal instructions, which are allowed in the middle of a hard clause,
|
// Internal instructions, which are allowed in the middle of a hard clause,
|
||||||
// except for s_waitcnt.
|
// except for s_waitcnt.
|
||||||
HARDCLAUSE_INTERNAL,
|
HARDCLAUSE_INTERNAL,
|
||||||
|
// Meta instructions that do not result in any ISA like KILL.
|
||||||
|
HARDCLAUSE_IGNORE,
|
||||||
// Instructions that are not allowed in a hard clause: SALU, export, branch,
|
// Instructions that are not allowed in a hard clause: SALU, export, branch,
|
||||||
// message, GDS, s_waitcnt and anything else not mentioned above.
|
// message, GDS, s_waitcnt and anything else not mentioned above.
|
||||||
HARDCLAUSE_ILLEGAL,
|
HARDCLAUSE_ILLEGAL,
|
||||||
|
@ -100,6 +102,8 @@ public:
|
||||||
// It's safe to treat the rest as illegal.
|
// It's safe to treat the rest as illegal.
|
||||||
if (MI.getOpcode() == AMDGPU::S_NOP)
|
if (MI.getOpcode() == AMDGPU::S_NOP)
|
||||||
return HARDCLAUSE_INTERNAL;
|
return HARDCLAUSE_INTERNAL;
|
||||||
|
if (MI.isMetaInstruction())
|
||||||
|
return HARDCLAUSE_IGNORE;
|
||||||
return HARDCLAUSE_ILLEGAL;
|
return HARDCLAUSE_ILLEGAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,25 +116,25 @@ public:
|
||||||
// The last non-internal instruction in the clause.
|
// The last non-internal instruction in the clause.
|
||||||
MachineInstr *Last = nullptr;
|
MachineInstr *Last = nullptr;
|
||||||
// The length of the clause including any internal instructions in the
|
// The length of the clause including any internal instructions in the
|
||||||
// middle or after the end of the clause.
|
// middle (but not at the end) of the clause.
|
||||||
unsigned Length = 0;
|
unsigned Length = 0;
|
||||||
|
// Internal instructions at the and of a clause should not be included in
|
||||||
|
// the clause. Count them in TrailingInternalLength until a new memory
|
||||||
|
// instruction is added.
|
||||||
|
unsigned TrailingInternalLength = 0;
|
||||||
// The base operands of *Last.
|
// The base operands of *Last.
|
||||||
SmallVector<const MachineOperand *, 4> BaseOps;
|
SmallVector<const MachineOperand *, 4> BaseOps;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
|
bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
|
||||||
// Get the size of the clause excluding any internal instructions at the
|
if (CI.First == CI.Last)
|
||||||
// end.
|
|
||||||
unsigned Size =
|
|
||||||
std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1;
|
|
||||||
if (Size < 2)
|
|
||||||
return false;
|
return false;
|
||||||
assert(Size <= 64 && "Hard clause is too long!");
|
assert(CI.Length <= 64 && "Hard clause is too long!");
|
||||||
|
|
||||||
auto &MBB = *CI.First->getParent();
|
auto &MBB = *CI.First->getParent();
|
||||||
auto ClauseMI =
|
auto ClauseMI =
|
||||||
BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
|
BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
|
||||||
.addImm(Size - 1);
|
.addImm(CI.Length - 1);
|
||||||
finalizeBundle(MBB, ClauseMI->getIterator(),
|
finalizeBundle(MBB, ClauseMI->getIterator(),
|
||||||
std::next(CI.Last->getIterator()));
|
std::next(CI.Last->getIterator()));
|
||||||
return true;
|
return true;
|
||||||
|
@ -168,6 +172,7 @@ public:
|
||||||
|
|
||||||
if (CI.Length == 64 ||
|
if (CI.Length == 64 ||
|
||||||
(CI.Length && Type != HARDCLAUSE_INTERNAL &&
|
(CI.Length && Type != HARDCLAUSE_INTERNAL &&
|
||||||
|
Type != HARDCLAUSE_IGNORE &&
|
||||||
(Type != CI.Type ||
|
(Type != CI.Type ||
|
||||||
// Note that we lie to shouldClusterMemOps about the size of the
|
// Note that we lie to shouldClusterMemOps about the size of the
|
||||||
// cluster. When shouldClusterMemOps is called from the machine
|
// cluster. When shouldClusterMemOps is called from the machine
|
||||||
|
@ -182,14 +187,20 @@ public:
|
||||||
|
|
||||||
if (CI.Length) {
|
if (CI.Length) {
|
||||||
// Extend the current clause.
|
// Extend the current clause.
|
||||||
++CI.Length;
|
if (Type != HARDCLAUSE_IGNORE) {
|
||||||
if (Type != HARDCLAUSE_INTERNAL) {
|
if (Type == HARDCLAUSE_INTERNAL) {
|
||||||
CI.Last = &MI;
|
++CI.TrailingInternalLength;
|
||||||
CI.BaseOps = std::move(BaseOps);
|
} else {
|
||||||
|
++CI.Length;
|
||||||
|
CI.Length += CI.TrailingInternalLength;
|
||||||
|
CI.TrailingInternalLength = 0;
|
||||||
|
CI.Last = &MI;
|
||||||
|
CI.BaseOps = std::move(BaseOps);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
|
} else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
|
||||||
// Start a new clause.
|
// Start a new clause.
|
||||||
CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)};
|
CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8624,10 +8624,10 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 {
|
||||||
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 14
|
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 14
|
||||||
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 15
|
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 15
|
||||||
; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
|
; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
|
||||||
|
; GFX10-SCRATCH-NEXT: s_clause 0x2
|
||||||
; GFX10-SCRATCH-NEXT: s_load_dword s2, s[0:1], 0x0
|
; GFX10-SCRATCH-NEXT: s_load_dword s2, s[0:1], 0x0
|
||||||
; GFX10-SCRATCH-NEXT: ; kill: killed $sgpr0_sgpr1
|
; GFX10-SCRATCH-NEXT: ; meta instruction
|
||||||
; GFX10-SCRATCH-NEXT: ; kill: killed $sgpr0_sgpr1
|
; GFX10-SCRATCH-NEXT: ; meta instruction
|
||||||
; GFX10-SCRATCH-NEXT: s_clause 0x1
|
|
||||||
; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40
|
; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40
|
||||||
; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0
|
; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0
|
||||||
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
|
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
|
||||||
|
|
|
@ -34,6 +34,27 @@ body: |
|
||||||
$sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
|
$sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
|
||||||
...
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: nop3
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0_sgpr1
|
||||||
|
; CHECK-LABEL: name: nop3
|
||||||
|
; CHECK: liveins: $sgpr0_sgpr1
|
||||||
|
; CHECK: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1 {
|
||||||
|
; CHECK: S_CLAUSE 2
|
||||||
|
; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
||||||
|
; CHECK: S_NOP 2
|
||||||
|
; CHECK: $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
|
||||||
|
; CHECK: }
|
||||||
|
; CHECK: S_NOP 2
|
||||||
|
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
||||||
|
S_NOP 2
|
||||||
|
$sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
|
||||||
|
S_NOP 2
|
||||||
|
...
|
||||||
|
|
||||||
---
|
---
|
||||||
name: long_clause
|
name: long_clause
|
||||||
tracksRegLiveness: true
|
tracksRegLiveness: true
|
||||||
|
@ -239,3 +260,43 @@ body: |
|
||||||
$vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
$vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||||
$vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
|
$vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
|
||||||
...
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: kill
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0_sgpr1, $sgpr4
|
||||||
|
; CHECK-LABEL: name: kill
|
||||||
|
; CHECK: liveins: $sgpr0_sgpr1, $sgpr4
|
||||||
|
; CHECK: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
|
||||||
|
; CHECK: S_CLAUSE 1
|
||||||
|
; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
||||||
|
; CHECK: KILL undef renamable $sgpr4
|
||||||
|
; CHECK: $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
|
||||||
|
; CHECK: }
|
||||||
|
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
||||||
|
KILL undef renamable $sgpr4
|
||||||
|
$sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: kill2
|
||||||
|
tracksRegLiveness: true
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0_sgpr1, $sgpr4, $sgpr5
|
||||||
|
; CHECK-LABEL: name: kill2
|
||||||
|
; CHECK: liveins: $sgpr0_sgpr1, $sgpr4, $sgpr5
|
||||||
|
; CHECK: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
|
||||||
|
; CHECK: S_CLAUSE 1
|
||||||
|
; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
||||||
|
; CHECK: KILL undef renamable $sgpr4
|
||||||
|
; CHECK: $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
|
||||||
|
; CHECK: }
|
||||||
|
; CHECK: KILL undef renamable $sgpr5
|
||||||
|
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
||||||
|
KILL undef renamable $sgpr4
|
||||||
|
$sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
|
||||||
|
KILL undef renamable $sgpr5
|
||||||
|
...
|
||||||
|
|
Loading…
Reference in New Issue