From 9e0eeba56928ea4f06f255eb36a4b3686ceb4a7b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 10 Apr 2019 17:07:56 +0000 Subject: [PATCH] GlobalISel: Handle odd breakdowns for bit ops llvm-svn: 358105 --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 1 + .../CodeGen/GlobalISel/LegalizerHelper.cpp | 82 ++++++++++--------- .../AMDGPU/GlobalISel/legalize-and.mir | 46 +++++++++++ .../CodeGen/AMDGPU/GlobalISel/legalize-or.mir | 46 +++++++++++ .../AMDGPU/GlobalISel/legalize-xor.mir | 46 +++++++++++ 5 files changed, 181 insertions(+), 40 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 392670d4262b..8834c468b3b6 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -210,6 +210,7 @@ private: LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 46b56d332af1..9fe0575feca8 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -654,46 +654,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // ... // AN = BinOp BN, CN // A = G_MERGE_VALUES A1, ..., AN - - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - int NumParts = SizeOp0 / NarrowSize; - - // List the registers where the destination will be scattered. - SmallVector DstRegs; - // List the registers where the first argument will be split. - SmallVector SrcsReg1; - // List the registers where the second argument will be split. - SmallVector SrcsReg2; - // Create all the temporary registers. - for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); - unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); - - DstRegs.push_back(DstReg); - SrcsReg1.push_back(SrcReg1); - SrcsReg2.push_back(SrcReg2); - } - // Explode the big arguments into smaller chunks. - MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); - MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); - - // Do the operation on each small part. - for (int i = 0; i < NumParts; ++i) - MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]}, - {SrcsReg1[i], SrcsReg2[i]}); - - // Gather the destination registers into the final destination. - unsigned DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); - else - MIRBuilder.buildMerge(DstReg, DstRegs); - MI.eraseFromParent(); - return Legalized; + return narrowScalarBasic(MI, TypeIdx, NarrowTy); } case TargetOpcode::G_SHL: case TargetOpcode::G_LSHR: @@ -2788,6 +2749,47 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + unsigned DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + assert(MI.getNumOperands() == 3 && TypeIdx == 0); + + SmallVector DstRegs, DstLeftoverRegs; + SmallVector Src0Regs, Src0LeftoverRegs; + SmallVector Src1Regs, Src1LeftoverRegs; + LLT LeftoverTy; + if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy, + Src0Regs, Src0LeftoverRegs)) + return UnableToLegalize; + + LLT Unused; + if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused, + Src1Regs, Src1LeftoverRegs)) + llvm_unreachable("inconsistent extractParts result"); + + for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { + auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, + {Src0Regs[I], Src1Regs[I]}); + DstRegs.push_back(Inst->getOperand(0).getReg()); + } + + for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { + auto Inst = MIRBuilder.buildInstr( + MI.getOpcode(), + {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]}); + DstLeftoverRegs.push_back(Inst->getOperand(0).getReg()); + } + + insertParts(DstReg, DstTy, NarrowTy, DstRegs, + LeftoverTy, DstLeftoverRegs); + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir index 232dbf4a0f83..e95e063212dd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -55,6 +55,52 @@ body: | $vgpr0_vgpr1 = COPY %2 ... +--- +name: test_and_s96 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_and_s96 + ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0 + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64 + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[EXTRACT]], [[EXTRACT2]] + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT1]], [[EXTRACT3]] + ; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[AND]](s64), 0 + ; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[AND1]](s32), 64 + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) + %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(s96) = G_AND %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2 +... + +--- +name: test_and_128 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: test_and_128 + ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) + ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128) + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[UV2]] + ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[UV3]] + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(s128) = G_AND %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... + --- name: test_and_s7 body: | diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir index 9da7bad8e7bb..88a15298c36f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -55,6 +55,52 @@ body: | $vgpr0_vgpr1 = COPY %2 ... +--- +name: test_or_s96 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_or_s96 + ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0 + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64 + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[EXTRACT]], [[EXTRACT2]] + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[EXTRACT1]], [[EXTRACT3]] + ; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[OR]](s64), 0 + ; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[OR1]](s32), 64 + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) + %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(s96) = G_OR %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2 +... + +--- +name: test_or_128 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: test_or_128 + ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) + ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128) + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[UV]], [[UV2]] + ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV1]], [[UV3]] + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[OR1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(s128) = G_OR %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... + --- name: test_or_s7 body: | diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir index 4d50e08c940f..fae9ae8c9f56 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -55,6 +55,52 @@ body: | $vgpr0_vgpr1 = COPY %2 ... +--- +name: test_xor_s96 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_xor_s96 + ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0 + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64 + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[EXTRACT]], [[EXTRACT2]] + ; CHECK: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[EXTRACT1]], [[EXTRACT3]] + ; CHECK: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF + ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[XOR]](s64), 0 + ; CHECK: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[XOR1]](s32), 64 + ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96) + %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + %2:_(s96) = G_XOR %0, %1 + $vgpr0_vgpr1_vgpr2 = COPY %2 +... + +--- +name: test_xor_128 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: test_xor_128 + ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) + ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128) + ; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[UV]], [[UV2]] + ; CHECK: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[UV1]], [[UV3]] + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[XOR]](s64), [[XOR1]](s64) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(s128) = G_XOR %0, %1 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 +... + --- name: test_xor_s7 body: |