GlobalISel: Handle odd breakdowns for bit ops

llvm-svn: 358105
This commit is contained in:
Matt Arsenault 2019-04-10 17:07:56 +00:00
parent 8b36ac818c
commit 9e0eeba569
5 changed files with 181 additions and 40 deletions

View File

@ -210,6 +210,7 @@ private:
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty);

View File

@ -654,46 +654,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// ...
// AN = BinOp<Ty/N> BN, CN
// A = G_MERGE_VALUES A1, ..., AN
// FIXME: add support for when SizeOp0 isn't an exact multiple of
// NarrowSize.
if (SizeOp0 % NarrowSize != 0)
return UnableToLegalize;
int NumParts = SizeOp0 / NarrowSize;
// List the registers where the destination will be scattered.
SmallVector<unsigned, 2> DstRegs;
// List the registers where the first argument will be split.
SmallVector<unsigned, 2> SrcsReg1;
// List the registers where the second argument will be split.
SmallVector<unsigned, 2> SrcsReg2;
// Create all the temporary registers.
for (int i = 0; i < NumParts; ++i) {
unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
DstRegs.push_back(DstReg);
SrcsReg1.push_back(SrcReg1);
SrcsReg2.push_back(SrcReg2);
}
// Explode the big arguments into smaller chunks.
MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
// Do the operation on each small part.
for (int i = 0; i < NumParts; ++i)
MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]},
{SrcsReg1[i], SrcsReg2[i]});
// Gather the destination registers into the final destination.
unsigned DstReg = MI.getOperand(0).getReg();
if(MRI.getType(DstReg).isVector())
MIRBuilder.buildBuildVector(DstReg, DstRegs);
else
MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
return narrowScalarBasic(MI, TypeIdx, NarrowTy);
}
case TargetOpcode::G_SHL:
case TargetOpcode::G_LSHR:
@ -2788,6 +2749,47 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
unsigned DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
assert(MI.getNumOperands() == 3 && TypeIdx == 0);
SmallVector<unsigned, 4> DstRegs, DstLeftoverRegs;
SmallVector<unsigned, 4> Src0Regs, Src0LeftoverRegs;
SmallVector<unsigned, 4> Src1Regs, Src1LeftoverRegs;
LLT LeftoverTy;
if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
Src0Regs, Src0LeftoverRegs))
return UnableToLegalize;
LLT Unused;
if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
Src1Regs, Src1LeftoverRegs))
llvm_unreachable("inconsistent extractParts result");
for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
{Src0Regs[I], Src1Regs[I]});
DstRegs.push_back(Inst->getOperand(0).getReg());
}
for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
auto Inst = MIRBuilder.buildInstr(
MI.getOpcode(),
{LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
}
insertParts(DstReg, DstTy, NarrowTy, DstRegs,
LeftoverTy, DstLeftoverRegs);
MI.eraseFromParent();
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {

View File

@ -55,6 +55,52 @@ body: |
$vgpr0_vgpr1 = COPY %2
...
---
name: test_and_s96
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
; CHECK-LABEL: name: test_and_s96
; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0
; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64
; CHECK: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0
; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[EXTRACT]], [[EXTRACT2]]
; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT1]], [[EXTRACT3]]
; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[AND]](s64), 0
; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[AND1]](s32), 64
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
%0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
%2:_(s96) = G_AND %0, %1
$vgpr0_vgpr1_vgpr2 = COPY %2
...
---
name: test_and_128
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
; CHECK-LABEL: name: test_and_128
; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128)
; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[UV2]]
; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[UV3]]
; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64)
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
%0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
%2:_(s128) = G_AND %0, %1
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
...
---
name: test_and_s7
body: |

View File

@ -55,6 +55,52 @@ body: |
$vgpr0_vgpr1 = COPY %2
...
---
name: test_or_s96
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
; CHECK-LABEL: name: test_or_s96
; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0
; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64
; CHECK: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0
; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[EXTRACT]], [[EXTRACT2]]
; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[EXTRACT1]], [[EXTRACT3]]
; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[OR]](s64), 0
; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR1]](s32), 64
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
%0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
%2:_(s96) = G_OR %0, %1
$vgpr0_vgpr1_vgpr2 = COPY %2
...
---
name: test_or_128
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
; CHECK-LABEL: name: test_or_128
; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128)
; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[UV]], [[UV2]]
; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV1]], [[UV3]]
; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[OR1]](s64)
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
%0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
%2:_(s128) = G_OR %0, %1
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
...
---
name: test_or_s7
body: |

View File

@ -55,6 +55,52 @@ body: |
$vgpr0_vgpr1 = COPY %2
...
---
name: test_xor_s96
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
; CHECK-LABEL: name: test_xor_s96
; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0
; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64
; CHECK: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0
; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64
; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[EXTRACT]], [[EXTRACT2]]
; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT1]], [[EXTRACT3]]
; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[XOR]](s64), 0
; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[XOR1]](s32), 64
; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
%0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
%1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
%2:_(s96) = G_XOR %0, %1
$vgpr0_vgpr1_vgpr2 = COPY %2
...
---
name: test_xor_128
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
; CHECK-LABEL: name: test_xor_128
; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128)
; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128)
; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[UV]], [[UV2]]
; CHECK: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[UV1]], [[UV3]]
; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[XOR]](s64), [[XOR1]](s64)
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
%0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
%2:_(s128) = G_XOR %0, %1
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
...
---
name: test_xor_s7
body: |