forked from OSchip/llvm-project
[AMDGPU] Ignore KILLs when forming clauses
KILL instructions are sometimes present and prevented hard clauses from being formed. Fix this by ignoring all meta instructions in clauses. Differential Revision: https://reviews.llvm.org/D106042
This commit is contained in:
parent
63bb2d585e
commit
bf980930e5
|
@ -268,6 +268,12 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
|
|||
return;
|
||||
}
|
||||
|
||||
if (MI->isMetaInstruction()) {
|
||||
if (isVerbose())
|
||||
OutStreamer->emitRawComment(" meta instruction");
|
||||
return;
|
||||
}
|
||||
|
||||
MCInst TmpInst;
|
||||
MCInstLowering.lower(MI, TmpInst);
|
||||
EmitToStreamer(*OutStreamer, TmpInst);
|
||||
|
|
|
@ -58,6 +58,8 @@ enum HardClauseType {
|
|||
// Internal instructions, which are allowed in the middle of a hard clause,
|
||||
// except for s_waitcnt.
|
||||
HARDCLAUSE_INTERNAL,
|
||||
// Meta instructions that do not result in any ISA like KILL.
|
||||
HARDCLAUSE_IGNORE,
|
||||
// Instructions that are not allowed in a hard clause: SALU, export, branch,
|
||||
// message, GDS, s_waitcnt and anything else not mentioned above.
|
||||
HARDCLAUSE_ILLEGAL,
|
||||
|
@ -100,6 +102,8 @@ public:
|
|||
// It's safe to treat the rest as illegal.
|
||||
if (MI.getOpcode() == AMDGPU::S_NOP)
|
||||
return HARDCLAUSE_INTERNAL;
|
||||
if (MI.isMetaInstruction())
|
||||
return HARDCLAUSE_IGNORE;
|
||||
return HARDCLAUSE_ILLEGAL;
|
||||
}
|
||||
|
||||
|
@ -112,25 +116,25 @@ public:
|
|||
// The last non-internal instruction in the clause.
|
||||
MachineInstr *Last = nullptr;
|
||||
// The length of the clause including any internal instructions in the
|
||||
// middle or after the end of the clause.
|
||||
// middle (but not at the end) of the clause.
|
||||
unsigned Length = 0;
|
||||
// Internal instructions at the and of a clause should not be included in
|
||||
// the clause. Count them in TrailingInternalLength until a new memory
|
||||
// instruction is added.
|
||||
unsigned TrailingInternalLength = 0;
|
||||
// The base operands of *Last.
|
||||
SmallVector<const MachineOperand *, 4> BaseOps;
|
||||
};
|
||||
|
||||
bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
|
||||
// Get the size of the clause excluding any internal instructions at the
|
||||
// end.
|
||||
unsigned Size =
|
||||
std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1;
|
||||
if (Size < 2)
|
||||
if (CI.First == CI.Last)
|
||||
return false;
|
||||
assert(Size <= 64 && "Hard clause is too long!");
|
||||
assert(CI.Length <= 64 && "Hard clause is too long!");
|
||||
|
||||
auto &MBB = *CI.First->getParent();
|
||||
auto ClauseMI =
|
||||
BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
|
||||
.addImm(Size - 1);
|
||||
.addImm(CI.Length - 1);
|
||||
finalizeBundle(MBB, ClauseMI->getIterator(),
|
||||
std::next(CI.Last->getIterator()));
|
||||
return true;
|
||||
|
@ -168,6 +172,7 @@ public:
|
|||
|
||||
if (CI.Length == 64 ||
|
||||
(CI.Length && Type != HARDCLAUSE_INTERNAL &&
|
||||
Type != HARDCLAUSE_IGNORE &&
|
||||
(Type != CI.Type ||
|
||||
// Note that we lie to shouldClusterMemOps about the size of the
|
||||
// cluster. When shouldClusterMemOps is called from the machine
|
||||
|
@ -182,14 +187,20 @@ public:
|
|||
|
||||
if (CI.Length) {
|
||||
// Extend the current clause.
|
||||
++CI.Length;
|
||||
if (Type != HARDCLAUSE_INTERNAL) {
|
||||
CI.Last = &MI;
|
||||
CI.BaseOps = std::move(BaseOps);
|
||||
if (Type != HARDCLAUSE_IGNORE) {
|
||||
if (Type == HARDCLAUSE_INTERNAL) {
|
||||
++CI.TrailingInternalLength;
|
||||
} else {
|
||||
++CI.Length;
|
||||
CI.Length += CI.TrailingInternalLength;
|
||||
CI.TrailingInternalLength = 0;
|
||||
CI.Last = &MI;
|
||||
CI.BaseOps = std::move(BaseOps);
|
||||
}
|
||||
}
|
||||
} else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
|
||||
// Start a new clause.
|
||||
CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)};
|
||||
CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -8624,10 +8624,10 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 {
|
|||
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 14
|
||||
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 15
|
||||
; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-SCRATCH-NEXT: s_clause 0x2
|
||||
; GFX10-SCRATCH-NEXT: s_load_dword s2, s[0:1], 0x0
|
||||
; GFX10-SCRATCH-NEXT: ; kill: killed $sgpr0_sgpr1
|
||||
; GFX10-SCRATCH-NEXT: ; kill: killed $sgpr0_sgpr1
|
||||
; GFX10-SCRATCH-NEXT: s_clause 0x1
|
||||
; GFX10-SCRATCH-NEXT: ; meta instruction
|
||||
; GFX10-SCRATCH-NEXT: ; meta instruction
|
||||
; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40
|
||||
; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0
|
||||
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
|
||||
|
|
|
@ -34,6 +34,27 @@ body: |
|
|||
$sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
|
||||
...
|
||||
|
||||
---
|
||||
name: nop3
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1
|
||||
; CHECK-LABEL: name: nop3
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1 {
|
||||
; CHECK: S_CLAUSE 2
|
||||
; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
||||
; CHECK: S_NOP 2
|
||||
; CHECK: $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
|
||||
; CHECK: }
|
||||
; CHECK: S_NOP 2
|
||||
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
||||
S_NOP 2
|
||||
$sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
|
||||
S_NOP 2
|
||||
...
|
||||
|
||||
---
|
||||
name: long_clause
|
||||
tracksRegLiveness: true
|
||||
|
@ -239,3 +260,43 @@ body: |
|
|||
$vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
|
||||
$vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
|
||||
...
|
||||
|
||||
---
|
||||
name: kill
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1, $sgpr4
|
||||
; CHECK-LABEL: name: kill
|
||||
; CHECK: liveins: $sgpr0_sgpr1, $sgpr4
|
||||
; CHECK: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
|
||||
; CHECK: S_CLAUSE 1
|
||||
; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
||||
; CHECK: KILL undef renamable $sgpr4
|
||||
; CHECK: $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
|
||||
; CHECK: }
|
||||
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
||||
KILL undef renamable $sgpr4
|
||||
$sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
|
||||
...
|
||||
|
||||
---
|
||||
name: kill2
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1, $sgpr4, $sgpr5
|
||||
; CHECK-LABEL: name: kill2
|
||||
; CHECK: liveins: $sgpr0_sgpr1, $sgpr4, $sgpr5
|
||||
; CHECK: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
|
||||
; CHECK: S_CLAUSE 1
|
||||
; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
||||
; CHECK: KILL undef renamable $sgpr4
|
||||
; CHECK: $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
|
||||
; CHECK: }
|
||||
; CHECK: KILL undef renamable $sgpr5
|
||||
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
||||
KILL undef renamable $sgpr4
|
||||
$sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
|
||||
KILL undef renamable $sgpr5
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue