TableGen/GlobalISel: Fix constraining REG_SEQUENCE operands

This was hitting the default instruction constraint code which uses
the register classes in the instruction def, which REG_SEQUENCE does
not have.

Fixes not constraining the register class for AMDGPU fneg/fabs
patterns, which would fail when the use was another generic,
unconstrained instruction.

Another oddity I noticed is that the temporary registers are created
with an unnecessary, but incorrect 16-bit LLT but this shouldn't
matter.

I'm also still unclear why root and sub-instructions have to be
handled differently.
This commit is contained in:
Matt Arsenault 2020-04-07 09:32:51 -04:00 committed by Matt Arsenault
parent 229e392b4e
commit cb5dc3765b
4 changed files with 95 additions and 28 deletions

View File

@ -201,11 +201,11 @@ body: |
; GCN-LABEL: name: fabs_s64_ss
; GCN: liveins: $sgpr0_sgpr1
; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
; GCN: [[FABS:%[0-9]+]]:sreg_64(s64) = G_FABS [[COPY]]
; GCN: $sgpr0_sgpr1 = COPY [[FABS]](s64)
; GCN: [[FABS:%[0-9]+]]:sgpr(s64) = G_FABS [[COPY]]
; GCN: S_ENDPGM 0, implicit [[FABS]](s64)
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_FABS %0
$sgpr0_sgpr1 = COPY %1
S_ENDPGM 0, implicit %1
...
---
@ -225,10 +225,10 @@ body: |
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1
; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]]
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_FABS %0
$vgpr0_vgpr1 = COPY %1
S_ENDPGM 0, implicit %1
...
---
@ -243,9 +243,9 @@ body: |
; GCN-LABEL: name: fabs_s64_vs
; GCN: liveins: $sgpr0_sgpr1
; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
; GCN: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]]
; GCN: $vgpr0_vgpr1 = COPY [[FABS]](s64)
; GCN: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]]
; GCN: S_ENDPGM 0, implicit [[FABS]](s64)
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:vgpr(s64) = G_FABS %0
$vgpr0_vgpr1 = COPY %1
S_ENDPGM 0, implicit %1
...

View File

@ -206,10 +206,10 @@ body: |
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
; GCN: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_FNEG %0
$sgpr0_sgpr1 = COPY %1
S_ENDPGM 0, implicit %1
...
---
@ -229,10 +229,10 @@ body: |
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e32_]], %subreg.sub1
; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]]
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_FNEG %0
$vgpr0_vgpr1 = COPY %1
S_ENDPGM 0, implicit %1
...
---
@ -247,11 +247,12 @@ body: |
; GCN-LABEL: name: fneg_s64_vs
; GCN: liveins: $sgpr0_sgpr1
; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
; GCN: [[FNEG:%[0-9]+]]:vreg_64(s64) = G_FNEG [[COPY]]
; GCN: $vgpr0_vgpr1 = COPY [[FNEG]](s64)
; GCN: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]]
; GCN: S_ENDPGM 0, implicit [[FNEG]](s64)
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:vgpr(s64) = G_FNEG %0
$vgpr0_vgpr1 = COPY %1
S_ENDPGM 0, implicit %1
...
---
@ -268,11 +269,11 @@ body: |
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
; GCN: $sgpr0 = COPY [[S_OR_B32_]]
; GCN: S_ENDPGM 0, implicit [[S_OR_B32_]]
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = G_FABS %0
%2:sgpr(s32) = G_FNEG %1
$sgpr0 = COPY %2
S_ENDPGM 0, implicit %2
...
---
@ -289,11 +290,11 @@ body: |
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]]
; GCN: S_ENDPGM 0, implicit [[V_XOR_B32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_FABS %0
%2:vgpr(s32) = G_FNEG %0
$vgpr0 = COPY %2
S_ENDPGM 0, implicit %2
...
---
@ -311,11 +312,11 @@ body: |
; GCN: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]]
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e32 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec
; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]](s32)
; GCN: S_ENDPGM 0, implicit [[V_XOR_B32_e32_]](s32)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = G_FABS %0
%2:vgpr(s32) = G_FNEG %1
$vgpr0 = COPY %2
S_ENDPGM 0, implicit %2
...
---
@ -472,11 +473,11 @@ body: |
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
; GCN: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_FABS %0
%2:sgpr(s64) = G_FNEG %1
$sgpr0_sgpr1 = COPY %2
S_ENDPGM 0, implicit %2
...
---
@ -496,11 +497,11 @@ body: |
; GCN: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e32_]], %subreg.sub1
; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]]
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_FABS %0
%2:vgpr(s64) = G_FNEG %1
$vgpr0_vgpr1 = COPY %2
S_ENDPGM 0, implicit %2
...
---
@ -521,9 +522,9 @@ body: |
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e32 [[COPY1]](s32), [[V_MOV_B32_e32_]](s32), implicit $exec
; GCN: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64)
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e32_]](s16), %subreg.sub1
; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]](s64)
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64)
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:vgpr(s64) = G_FABS %0
%2:vgpr(s64) = G_FNEG %1
$vgpr0_vgpr1 = COPY %2
S_ENDPGM 0, implicit %2
...

View File

@ -56,7 +56,26 @@ def SUBSOME_INSN : I<(outs SRegs:$dst), (ins SOP:$src), []>;
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/1, /*TempRegFlags*/0,
// CHECK-NEXT: GIR_AddImm, /*InsnID*/0, /*SubRegIndex*/2,
// CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0,
// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC DRegs*/1,
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, /*RC SRegs*/0,
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/3, /*RC SRegs*/0,
def : Pat<(i32 (sext SOP:$src)),
(REG_SEQUENCE DRegs, (SUBSOME_INSN SOP:$src), sub0,
(SUBSOME_INSN SOP:$src), sub1)>;
// CHECK: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_ZEXT,
// CHECK: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::REG_SEQUENCE,
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/RegState::Define,
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/1, /*TempRegFlags*/0,
// CHECK-NEXT: GIR_AddImm, /*InsnID*/1, /*SubRegIndex*/1,
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/2, /*TempRegFlags*/0,
// CHECK-NEXT: GIR_AddImm, /*InsnID*/1, /*SubRegIndex*/2,
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, /*RC DRegs*/1,
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, /*RC SRegs*/0,
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/3, /*RC SRegs*/0,
// CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SOME_INSN,
// Make sure operands are constrained when REG_SEQUENCE isn't the root instruction.
def : Pat<(i32 (zext SOP:$src)),
(SOME_INSN (REG_SEQUENCE DRegs, (SUBSOME_INSN SOP:$src), sub0,
(SUBSOME_INSN SOP:$src), sub1))>;

View File

@ -4281,6 +4281,29 @@ GlobalISelEmitter::createAndImportSubInstructionRenderer(
return InsertPtOrError.get();
}
if (OpName == "REG_SEQUENCE") {
auto SuperClass = inferRegClassFromPattern(Dst->getChild(0));
M.insertAction<ConstrainOperandToRegClassAction>(
InsertPt, DstMIBuilder.getInsnID(), 0, **SuperClass);
unsigned Num = Dst->getNumChildren();
for (unsigned I = 1; I != Num; I += 2) {
TreePatternNode *SubRegChild = Dst->getChild(I + 1);
auto SubIdx = inferSubRegIndexForNode(SubRegChild);
if (!SubIdx)
return failedImport("REG_SEQUENCE child is not a subreg index");
const auto SrcRCDstRCPair =
(*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass");
M.insertAction<ConstrainOperandToRegClassAction>(
InsertPt, DstMIBuilder.getInsnID(), I, *SrcRCDstRCPair->second);
}
return InsertPtOrError.get();
}
M.insertAction<ConstrainOperandsToDefinitionAction>(InsertPt,
DstMIBuilder.getInsnID());
return InsertPtOrError.get();
@ -4934,6 +4957,30 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
return std::move(M);
}
if (DstIName == "REG_SEQUENCE") {
auto SuperClass = inferRegClassFromPattern(Dst->getChild(0));
M.addAction<ConstrainOperandToRegClassAction>(0, 0, **SuperClass);
unsigned Num = Dst->getNumChildren();
for (unsigned I = 1; I != Num; I += 2) {
TreePatternNode *SubRegChild = Dst->getChild(I + 1);
auto SubIdx = inferSubRegIndexForNode(SubRegChild);
if (!SubIdx)
return failedImport("REG_SEQUENCE child is not a subreg index");
const auto SrcRCDstRCPair =
(*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
M.addAction<ConstrainOperandToRegClassAction>(0, I,
*SrcRCDstRCPair->second);
}
++NumPatternImported;
return std::move(M);
}
M.addAction<ConstrainOperandsToDefinitionAction>(0);
// We're done with this pattern! It's eligible for GISel emission; return it.