forked from OSchip/llvm-project
TableGen/GlobalISel: Fix constraining REG_SEQUENCE operands
This was hitting the default instruction constraint code which uses the register classes in the instruction def, which REG_SEQUENCE does not have. Fixes not constraining the register class for AMDGPU fneg/fabs patterns, which would fail when the use was another generic, unconstrained instruction. Another oddity I noticed is that the temporary registers are created with an unnecessary, but incorrect 16-bit LLT but this shouldn't matter. I'm also still unclear why root and sub-instructions have to be handled differently.
This commit is contained in:
parent
229e392b4e
commit
cb5dc3765b
|
@ -201,11 +201,11 @@ body: |
|
|||
; GCN-LABEL: name: fabs_s64_ss
|
||||
; GCN: liveins: $sgpr0_sgpr1
|
||||
; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
|
||||
; GCN: [[FABS:%[0-9]+]]:sreg_64(s64) = G_FABS [[COPY]]
|
||||
; GCN: $sgpr0_sgpr1 = COPY [[FABS]](s64)
|
||||
; GCN: [[FABS:%[0-9]+]]:sgpr(s64) = G_FABS [[COPY]]
|
||||
; GCN: S_ENDPGM 0, implicit [[FABS]](s64)
|
||||
%0:sgpr(s64) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_FABS %0
|
||||
$sgpr0_sgpr1 = COPY %1
|
||||
S_ENDPGM 0, implicit %1
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -225,10 +225,10 @@ body: |
|
|||
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1
|
||||
; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]]
|
||||
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
|
||||
%0:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_FABS %0
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
S_ENDPGM 0, implicit %1
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -243,9 +243,9 @@ body: |
|
|||
; GCN-LABEL: name: fabs_s64_vs
|
||||
; GCN: liveins: $sgpr0_sgpr1
|
||||
; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
|
||||
; GCN: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]]
|
||||
; GCN: $vgpr0_vgpr1 = COPY [[FABS]](s64)
|
||||
; GCN: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]]
|
||||
; GCN: S_ENDPGM 0, implicit [[FABS]](s64)
|
||||
%0:sgpr(s64) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(s64) = G_FABS %0
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
S_ENDPGM 0, implicit %1
|
||||
...
|
||||
|
|
|
@ -206,10 +206,10 @@ body: |
|
|||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
|
||||
; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
|
||||
; GCN: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
|
||||
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
|
||||
%0:sgpr(s64) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_FNEG %0
|
||||
$sgpr0_sgpr1 = COPY %1
|
||||
S_ENDPGM 0, implicit %1
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -229,10 +229,10 @@ body: |
|
|||
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e32_]], %subreg.sub1
|
||||
; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]]
|
||||
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
|
||||
%0:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_FNEG %0
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
S_ENDPGM 0, implicit %1
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -247,11 +247,12 @@ body: |
|
|||
; GCN-LABEL: name: fneg_s64_vs
|
||||
; GCN: liveins: $sgpr0_sgpr1
|
||||
; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
|
||||
; GCN: [[FNEG:%[0-9]+]]:vreg_64(s64) = G_FNEG [[COPY]]
|
||||
; GCN: $vgpr0_vgpr1 = COPY [[FNEG]](s64)
|
||||
; GCN: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]]
|
||||
; GCN: S_ENDPGM 0, implicit [[FNEG]](s64)
|
||||
%0:sgpr(s64) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(s64) = G_FNEG %0
|
||||
$vgpr0_vgpr1 = COPY %1
|
||||
S_ENDPGM 0, implicit %1
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -268,11 +269,11 @@ body: |
|
|||
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
|
||||
; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; GCN: $sgpr0 = COPY [[S_OR_B32_]]
|
||||
; GCN: S_ENDPGM 0, implicit [[S_OR_B32_]]
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
%1:sgpr(s32) = G_FABS %0
|
||||
%2:sgpr(s32) = G_FNEG %1
|
||||
$sgpr0 = COPY %2
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -289,11 +290,11 @@ body: |
|
|||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
|
||||
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
|
||||
; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]]
|
||||
; GCN: S_ENDPGM 0, implicit [[V_XOR_B32_e32_]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_FABS %0
|
||||
%2:vgpr(s32) = G_FNEG %0
|
||||
$vgpr0 = COPY %2
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -311,11 +312,11 @@ body: |
|
|||
; GCN: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]]
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648
|
||||
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e32 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec
|
||||
; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]](s32)
|
||||
; GCN: S_ENDPGM 0, implicit [[V_XOR_B32_e32_]](s32)
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
%1:vgpr(s32) = G_FABS %0
|
||||
%2:vgpr(s32) = G_FNEG %1
|
||||
$vgpr0 = COPY %2
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -472,11 +473,11 @@ body: |
|
|||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
|
||||
; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
|
||||
; GCN: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
|
||||
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
|
||||
%0:sgpr(s64) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_FABS %0
|
||||
%2:sgpr(s64) = G_FNEG %1
|
||||
$sgpr0_sgpr1 = COPY %2
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -496,11 +497,11 @@ body: |
|
|||
; GCN: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
|
||||
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e32_]], %subreg.sub1
|
||||
; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]]
|
||||
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
|
||||
%0:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_FABS %0
|
||||
%2:vgpr(s64) = G_FNEG %1
|
||||
$vgpr0_vgpr1 = COPY %2
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -521,9 +522,9 @@ body: |
|
|||
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e32 [[COPY1]](s32), [[V_MOV_B32_e32_]](s32), implicit $exec
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64)
|
||||
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e32_]](s16), %subreg.sub1
|
||||
; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]](s64)
|
||||
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64)
|
||||
%0:sgpr(s64) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(s64) = G_FABS %0
|
||||
%2:vgpr(s64) = G_FNEG %1
|
||||
$vgpr0_vgpr1 = COPY %2
|
||||
S_ENDPGM 0, implicit %2
|
||||
...
|
||||
|
|
|
@ -56,7 +56,26 @@ def SUBSOME_INSN : I<(outs SRegs:$dst), (ins SOP:$src), []>;
|
|||
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/1, /*TempRegFlags*/0,
|
||||
// CHECK-NEXT: GIR_AddImm, /*InsnID*/0, /*SubRegIndex*/2,
|
||||
// CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0,
|
||||
// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
|
||||
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC DRegs*/1,
|
||||
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, /*RC SRegs*/0,
|
||||
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/3, /*RC SRegs*/0,
|
||||
def : Pat<(i32 (sext SOP:$src)),
|
||||
(REG_SEQUENCE DRegs, (SUBSOME_INSN SOP:$src), sub0,
|
||||
(SUBSOME_INSN SOP:$src), sub1)>;
|
||||
|
||||
|
||||
// CHECK: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_ZEXT,
|
||||
// CHECK: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::REG_SEQUENCE,
|
||||
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/RegState::Define,
|
||||
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/1, /*TempRegFlags*/0,
|
||||
// CHECK-NEXT: GIR_AddImm, /*InsnID*/1, /*SubRegIndex*/1,
|
||||
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/2, /*TempRegFlags*/0,
|
||||
// CHECK-NEXT: GIR_AddImm, /*InsnID*/1, /*SubRegIndex*/2,
|
||||
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, /*RC DRegs*/1,
|
||||
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, /*RC SRegs*/0,
|
||||
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/3, /*RC SRegs*/0,
|
||||
// CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SOME_INSN,
|
||||
// Make sure operands are constrained when REG_SEQUENCE isn't the root instruction.
|
||||
def : Pat<(i32 (zext SOP:$src)),
|
||||
(SOME_INSN (REG_SEQUENCE DRegs, (SUBSOME_INSN SOP:$src), sub0,
|
||||
(SUBSOME_INSN SOP:$src), sub1))>;
|
||||
|
|
|
@ -4281,6 +4281,29 @@ GlobalISelEmitter::createAndImportSubInstructionRenderer(
|
|||
return InsertPtOrError.get();
|
||||
}
|
||||
|
||||
if (OpName == "REG_SEQUENCE") {
|
||||
auto SuperClass = inferRegClassFromPattern(Dst->getChild(0));
|
||||
M.insertAction<ConstrainOperandToRegClassAction>(
|
||||
InsertPt, DstMIBuilder.getInsnID(), 0, **SuperClass);
|
||||
|
||||
unsigned Num = Dst->getNumChildren();
|
||||
for (unsigned I = 1; I != Num; I += 2) {
|
||||
TreePatternNode *SubRegChild = Dst->getChild(I + 1);
|
||||
|
||||
auto SubIdx = inferSubRegIndexForNode(SubRegChild);
|
||||
if (!SubIdx)
|
||||
return failedImport("REG_SEQUENCE child is not a subreg index");
|
||||
|
||||
const auto SrcRCDstRCPair =
|
||||
(*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
|
||||
assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass");
|
||||
M.insertAction<ConstrainOperandToRegClassAction>(
|
||||
InsertPt, DstMIBuilder.getInsnID(), I, *SrcRCDstRCPair->second);
|
||||
}
|
||||
|
||||
return InsertPtOrError.get();
|
||||
}
|
||||
|
||||
M.insertAction<ConstrainOperandsToDefinitionAction>(InsertPt,
|
||||
DstMIBuilder.getInsnID());
|
||||
return InsertPtOrError.get();
|
||||
|
@ -4934,6 +4957,30 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
|
|||
return std::move(M);
|
||||
}
|
||||
|
||||
if (DstIName == "REG_SEQUENCE") {
|
||||
auto SuperClass = inferRegClassFromPattern(Dst->getChild(0));
|
||||
|
||||
M.addAction<ConstrainOperandToRegClassAction>(0, 0, **SuperClass);
|
||||
|
||||
unsigned Num = Dst->getNumChildren();
|
||||
for (unsigned I = 1; I != Num; I += 2) {
|
||||
TreePatternNode *SubRegChild = Dst->getChild(I + 1);
|
||||
|
||||
auto SubIdx = inferSubRegIndexForNode(SubRegChild);
|
||||
if (!SubIdx)
|
||||
return failedImport("REG_SEQUENCE child is not a subreg index");
|
||||
|
||||
const auto SrcRCDstRCPair =
|
||||
(*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
|
||||
|
||||
M.addAction<ConstrainOperandToRegClassAction>(0, I,
|
||||
*SrcRCDstRCPair->second);
|
||||
}
|
||||
|
||||
++NumPatternImported;
|
||||
return std::move(M);
|
||||
}
|
||||
|
||||
M.addAction<ConstrainOperandsToDefinitionAction>(0);
|
||||
|
||||
// We're done with this pattern! It's eligible for GISel emission; return it.
|
||||
|
|
Loading…
Reference in New Issue