diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 24ab286b51f3..aa08d4530cb1 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -293,6 +293,7 @@ public: /// Idxs[0] + N)` of \p Src and similarly for subsequent bit-indexes. /// /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Indices must be in ascending order of bit position. /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildExtract(ArrayRef ResTys, @@ -311,7 +312,7 @@ public: /// destination register. /// \pre The bits defined by each Op (derived from index and scalar size) must /// not overlap. - /// \pre Each source operand must have a + /// \pre \p Indices must be in ascending order of bit position. /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildSequence(LLT ResTy, unsigned Res, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineLegalizePass.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineLegalizePass.h index ccdc05a96237..eca06f4612b8 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineLegalizePass.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineLegalizePass.h @@ -26,6 +26,8 @@ namespace llvm { +class MachineRegisterInfo; + class MachineLegalizePass : public MachineFunctionPass { public: static char ID; @@ -55,6 +57,9 @@ public: MachineFunctionProperties::Property::Legalized); } + bool combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI, + const TargetInstrInfo &TII); + bool runOnMachineFunction(MachineFunction &MF) override; }; } // End namespace llvm. diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 76555f99c580..df48c428cb11 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -193,6 +193,8 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(ArrayRef ResTys, assert(ResTys.size() == Results.size() && Results.size() == Indices.size() && "inconsistent number of regs"); assert(!Results.empty() && "invalid trivial extract"); + assert(std::is_sorted(Indices.begin(), Indices.end()) && + "extract offsets must be in ascending order"); auto MIB = BuildMI(getMF(), DL, getTII().get(TargetOpcode::G_EXTRACT)); for (unsigned i = 0; i < ResTys.size(); ++i) @@ -222,6 +224,8 @@ MachineIRBuilder::buildSequence(LLT ResTy, unsigned Res, assert(OpTys.size() == Ops.size() && Ops.size() == Indices.size() && "incompatible args"); assert(!Ops.empty() && "invalid trivial sequence"); + assert(std::is_sorted(Indices.begin(), Indices.end()) && + "sequence offsets must be in ascending order"); MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_SEQUENCE, LLT::scalar(ResTy.getSizeInBits())); diff --git a/llvm/lib/CodeGen/GlobalISel/MachineLegalizePass.cpp b/llvm/lib/CodeGen/GlobalISel/MachineLegalizePass.cpp index ba56eb09db12..b0de76d075fb 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineLegalizePass.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineLegalizePass.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #define DEBUG_TYPE "legalize-mir" @@ -46,6 +47,70 @@ void MachineLegalizePass::getAnalysisUsage(AnalysisUsage &AU) const { void MachineLegalizePass::init(MachineFunction &MF) { } +bool MachineLegalizePass::combineExtracts(MachineInstr &MI, + MachineRegisterInfo &MRI, + const TargetInstrInfo &TII) { + bool Changed = false; + if (MI.getOpcode() != TargetOpcode::G_EXTRACT) + return Changed; + + unsigned NumDefs = (MI.getNumOperands() - 1) / 2; + unsigned SrcReg = MI.getOperand(NumDefs).getReg(); + MachineInstr &SeqI = *MRI.def_instr_begin(SrcReg); + if (SeqI.getOpcode() != TargetOpcode::G_SEQUENCE) + return Changed; + + unsigned NumSeqSrcs = (SeqI.getNumOperands() - 1) / 2; + bool AllDefsReplaced = true; + + // Try to match each register extracted with a corresponding insertion formed + // by the G_SEQUENCE. + for (unsigned Idx = 0, SeqIdx = 0; Idx < NumDefs; ++Idx) { + MachineOperand &ExtractMO = MI.getOperand(Idx); + assert(ExtractMO.isReg() && ExtractMO.isDef() && + "unexpected extract operand"); + + unsigned ExtractReg = ExtractMO.getReg(); + unsigned ExtractPos = MI.getOperand(NumDefs + Idx + 1).getImm(); + + while (SeqIdx < NumSeqSrcs && + SeqI.getOperand(2 * SeqIdx + 2).getImm() < ExtractPos) + ++SeqIdx; + + if (SeqIdx == NumSeqSrcs || + SeqI.getOperand(2 * SeqIdx + 2).getImm() != ExtractPos || + SeqI.getType(SeqIdx + 1) != MI.getType(Idx)) { + AllDefsReplaced = false; + continue; + } + + unsigned OrigReg = SeqI.getOperand(2 * SeqIdx + 1).getReg(); + assert(!TargetRegisterInfo::isPhysicalRegister(OrigReg) && + "unexpected physical register in G_SEQUENCE"); + + // Finally we can replace the uses. + for (auto &Use : MRI.use_operands(ExtractReg)) { + Changed = true; + Use.setReg(OrigReg); + } + } + + if (AllDefsReplaced) { + // If SeqI was the next instruction in the BB and we removed it, we'd break + // the outer iteration. + assert(std::next(MachineBasicBlock::iterator(MI)) != SeqI && + "G_SEQUENCE does not dominate G_EXTRACT"); + + MI.eraseFromParent(); + + if (MRI.use_empty(SrcReg)) + SeqI.eraseFromParent(); + Changed = true; + } + + return Changed; +} + bool MachineLegalizePass::runOnMachineFunction(MachineFunction &MF) { // If the ISel pipeline failed, do not bother running that pass. if (MF.getProperties().hasProperty( @@ -94,5 +159,19 @@ bool MachineLegalizePass::runOnMachineFunction(MachineFunction &MF) { Changed |= Res == MachineLegalizeHelper::Legalized; } + + + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + for (auto &MBB : MF) { + for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) { + // Get the next Instruction before we try to legalize, because there's a + // good chance MI will be deleted. + NextMI = std::next(MI); + + Changed |= combineExtracts(*MI, MRI, TII); + } + } + return Changed; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir index d1d959976d82..c328a7071df8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-add.mir @@ -33,13 +33,16 @@ body: | bb.0.entry: liveins: %x0, %x1, %x2, %x3 ; CHECK-LABEL: name: test_scalar_add_big - ; CHECK-DAG: [[LHS_LO:%.*]](64), [[LHS_HI:%.*]](64) = G_EXTRACT { s64, s64, s128 } %4, 0, 64 - ; CHECK-DAG: [[RHS_LO:%.*]](64), [[RHS_HI:%.*]](64) = G_EXTRACT { s64, s64, s128 } %5, 0, 64 + ; CHECK-NOT: G_EXTRACT + ; CHECK-NOT: G_SEQUENCE ; CHECK-DAG: [[CARRY0_32:%.*]](32) = G_CONSTANT s32 0 ; CHECK-DAG: [[CARRY0:%[0-9]+]](1) = G_TRUNC { s1, s32 } [[CARRY0_32]] - ; CHECK: [[RES_LO:%.*]](64), [[CARRY:%.*]](1) = G_UADDE s64 [[LHS_LO]], [[RHS_LO]], [[CARRY0]] - ; CHECK: [[RES_HI:%.*]](64), {{%.*}}(1) = G_UADDE s64 [[LHS_HI]], [[RHS_HI]], [[CARRY]] - ; CHECK: %6(128) = G_SEQUENCE { s128, s64, s64 } [[RES_LO]], 0, [[RES_HI]], 64 + ; CHECK: [[RES_LO:%.*]](64), [[CARRY:%.*]](1) = G_UADDE s64 %0, %2, [[CARRY0]] + ; CHECK: [[RES_HI:%.*]](64), {{%.*}}(1) = G_UADDE s64 %1, %3, [[CARRY]] + ; CHECK-NOT: G_EXTRACT + ; CHECK-NOT: G_SEQUENCE + ; CHECK: %x0 = COPY [[RES_LO]] + ; CHECK: %x1 = COPY [[RES_HI]] %0(64) = COPY %x0 %1(64) = COPY %x1 @@ -93,11 +96,14 @@ body: | bb.0.entry: liveins: %q0, %q1, %q2, %q3 ; CHECK-LABEL: name: test_vector_add - ; CHECK-DAG: [[LHS_LO:%.*]](128), [[LHS_HI:%.*]](128) = G_EXTRACT { s128, s128, s256 } %4, 0, 128 - ; CHECK-DAG: [[RHS_LO:%.*]](128), [[RHS_HI:%.*]](128) = G_EXTRACT { s128, s128, s256 } %5, 0, 128 - ; CHECK: [[RES_LO:%.*]](128) = G_ADD <2 x s64> [[LHS_LO]], [[RHS_LO]] - ; CHECK: [[RES_HI:%.*]](128) = G_ADD <2 x s64> [[LHS_HI]], [[RHS_HI]] - ; CHECK: %6(256) = G_SEQUENCE { s256, s128, s128 } [[RES_LO]], 0, [[RES_HI]], 128 + ; CHECK-NOT: G_EXTRACT + ; CHECK-NOT: G_SEQUENCE + ; CHECK: [[RES_LO:%.*]](128) = G_ADD <2 x s64> %0, %2 + ; CHECK: [[RES_HI:%.*]](128) = G_ADD <2 x s64> %1, %3 + ; CHECK-NOT: G_EXTRACT + ; CHECK-NOT: G_SEQUENCE + ; CHECK: %q0 = COPY [[RES_LO]] + ; CHECK: %q1 = COPY [[RES_HI]] %0(128) = COPY %q0 %1(128) = COPY %q1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir new file mode 100644 index 000000000000..87137fb3eb79 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir @@ -0,0 +1,92 @@ +# RUN: llc -O0 -run-pass=legalize-mir -global-isel %s -o - 2>&1 | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-apple-ios" + define void @test_combines() { + entry: + ret void + } +... + +--- +name: test_combines +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } + - { id: 9, class: _ } + - { id: 10, class: _ } + - { id: 11, class: _ } + - { id: 12, class: _ } + - { id: 13, class: _ } + - { id: 14, class: _ } + - { id: 15, class: _ } + - { id: 16, class: _ } + - { id: 17, class: _ } + - { id: 18, class: _ } + - { id: 19, class: _ } + - { id: 20, class: _ } + - { id: 21, class: _ } + - { id: 22, class: _ } + - { id: 23, class: _ } + - { id: 24, class: _ } +body: | + bb.0.entry: + liveins: %w0, %w1, %x2, %x3 + + %0(32) = COPY %w0 + %1(32) = COPY %w1 + %2(8) = G_TRUNC { s8, s32 } %0 + + ; Only one of these extracts can be eliminated, the offsets don't match + ; properly in the other cases. + ; CHECK-LABEL: name: test_combines + ; CHECK: %3(32) = G_SEQUENCE { s32, s8 } %2, 1 + ; CHECK: %4(8) = G_EXTRACT { s8, s32 } %3, 0 + ; CHECK-NOT: G_EXTRACT + ; CHECK: %6(8) = G_EXTRACT { s8, s32 } %3, 2 + ; CHECK: %7(32) = G_ZEXT { s32, s8 } %2 + %3(32) = G_SEQUENCE { s32, s8 } %2, 1 + %4(8) = G_EXTRACT { s8, s32 } %3, 0 + %5(8) = G_EXTRACT { s8, s32 } %3, 1 + %6(8) = G_EXTRACT { s8, s32 } %3, 2 + %7(32) = G_ZEXT { s32, s8 } %5 + + ; Similarly, here the types don't match. + ; CHECK: %10(32) = G_SEQUENCE { s32, s16, s16 } %8, 0, %9, 16 + ; CHECK: %11(1) = G_EXTRACT { s1, s32 } %10, 0 + ; CHECK: %12(32) = G_EXTRACT { s32, s32 } %10, 0 + %8(16) = G_TRUNC { s16, s32 } %0 + %9(16) = G_ADD s16 %8, %8 + %10(32) = G_SEQUENCE { s32, s16, s16 } %8, 0, %9, 16 + %11(1) = G_EXTRACT { s1, s32 } %10, 0 + %12(32) = G_EXTRACT { s32, s32 } %10, 0 + + ; CHECK-NOT: G_EXTRACT + ; CHECK: %15(16) = G_ADD s16 %8, %9 + %13(16), %14(16) = G_EXTRACT { s16, s16, s32 } %10, 0, 16 + %15(16) = G_ADD s16 %13, %14 + + ; CHECK: %18(64) = G_EXTRACT { <2 x s32>, s128 } %17, 0 + ; CHECK: %19(64) = G_ADD <2 x s32> %18, %18 + %16(64) = COPY %x0 + %17(128) = G_SEQUENCE { s128, s64, s64 } %16, 0, %16, 64 + %18(64) = G_EXTRACT { <2 x s32>, s128 } %17, 0 + %19(64) = G_ADD <2 x s32> %18, %18 + + ; CHECK-NOT: G_SEQUENCE + ; CHECK-NOT: G_EXTRACT + ; CHECK: %24(32) = G_ADD s32 %0, %20 + %20(32) = G_ADD s32 %0, %0 + %21(64) = G_SEQUENCE { s64, s32, s32 } %0, 0, %20, 32 + %22(32) = G_EXTRACT { s32, s64 } %21, 0 + %23(32) = G_EXTRACT { s32, s64 } %21, 32 + %24(32) = G_ADD s32 %22, %23 +...