forked from OSchip/llvm-project
GlobalISel: combine extracts & sequences created for legalization
Legalization ends up creating many G_SEQUENCE/G_EXTRACT pairs which leads to inefficient codegen (even for -O0), so add a quick pass over the function to remove them again. llvm-svn: 280155
This commit is contained in:
parent
a609e2d5ce
commit
991b12bf09
|
@ -293,6 +293,7 @@ public:
|
|||
/// Idxs[0] + N)` of \p Src and similarly for subsequent bit-indexes.
|
||||
///
|
||||
/// \pre setBasicBlock or setMI must have been called.
|
||||
/// \pre \p Indices must be in ascending order of bit position.
|
||||
///
|
||||
/// \return a MachineInstrBuilder for the newly created instruction.
|
||||
MachineInstrBuilder buildExtract(ArrayRef<LLT> ResTys,
|
||||
|
@ -311,7 +312,7 @@ public:
|
|||
/// destination register.
|
||||
/// \pre The bits defined by each Op (derived from index and scalar size) must
|
||||
/// not overlap.
|
||||
/// \pre Each source operand must have a
|
||||
/// \pre \p Indices must be in ascending order of bit position.
|
||||
///
|
||||
/// \return a MachineInstrBuilder for the newly created instruction.
|
||||
MachineInstrBuilder buildSequence(LLT ResTy, unsigned Res,
|
||||
|
|
|
@ -26,6 +26,8 @@
|
|||
|
||||
namespace llvm {
|
||||
|
||||
class MachineRegisterInfo;
|
||||
|
||||
class MachineLegalizePass : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
|
@ -55,6 +57,9 @@ public:
|
|||
MachineFunctionProperties::Property::Legalized);
|
||||
}
|
||||
|
||||
bool combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
const TargetInstrInfo &TII);
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
};
|
||||
} // End namespace llvm.
|
||||
|
|
|
@ -193,6 +193,8 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(ArrayRef<LLT> ResTys,
|
|||
assert(ResTys.size() == Results.size() && Results.size() == Indices.size() &&
|
||||
"inconsistent number of regs");
|
||||
assert(!Results.empty() && "invalid trivial extract");
|
||||
assert(std::is_sorted(Indices.begin(), Indices.end()) &&
|
||||
"extract offsets must be in ascending order");
|
||||
|
||||
auto MIB = BuildMI(getMF(), DL, getTII().get(TargetOpcode::G_EXTRACT));
|
||||
for (unsigned i = 0; i < ResTys.size(); ++i)
|
||||
|
@ -222,6 +224,8 @@ MachineIRBuilder::buildSequence(LLT ResTy, unsigned Res,
|
|||
assert(OpTys.size() == Ops.size() && Ops.size() == Indices.size() &&
|
||||
"incompatible args");
|
||||
assert(!Ops.empty() && "invalid trivial sequence");
|
||||
assert(std::is_sorted(Indices.begin(), Indices.end()) &&
|
||||
"sequence offsets must be in ascending order");
|
||||
|
||||
MachineInstrBuilder MIB =
|
||||
buildInstr(TargetOpcode::G_SEQUENCE, LLT::scalar(ResTy.getSizeInBits()));
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
|
||||
#define DEBUG_TYPE "legalize-mir"
|
||||
|
@ -46,6 +47,70 @@ void MachineLegalizePass::getAnalysisUsage(AnalysisUsage &AU) const {
|
|||
void MachineLegalizePass::init(MachineFunction &MF) {
|
||||
}
|
||||
|
||||
bool MachineLegalizePass::combineExtracts(MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
const TargetInstrInfo &TII) {
|
||||
bool Changed = false;
|
||||
if (MI.getOpcode() != TargetOpcode::G_EXTRACT)
|
||||
return Changed;
|
||||
|
||||
unsigned NumDefs = (MI.getNumOperands() - 1) / 2;
|
||||
unsigned SrcReg = MI.getOperand(NumDefs).getReg();
|
||||
MachineInstr &SeqI = *MRI.def_instr_begin(SrcReg);
|
||||
if (SeqI.getOpcode() != TargetOpcode::G_SEQUENCE)
|
||||
return Changed;
|
||||
|
||||
unsigned NumSeqSrcs = (SeqI.getNumOperands() - 1) / 2;
|
||||
bool AllDefsReplaced = true;
|
||||
|
||||
// Try to match each register extracted with a corresponding insertion formed
|
||||
// by the G_SEQUENCE.
|
||||
for (unsigned Idx = 0, SeqIdx = 0; Idx < NumDefs; ++Idx) {
|
||||
MachineOperand &ExtractMO = MI.getOperand(Idx);
|
||||
assert(ExtractMO.isReg() && ExtractMO.isDef() &&
|
||||
"unexpected extract operand");
|
||||
|
||||
unsigned ExtractReg = ExtractMO.getReg();
|
||||
unsigned ExtractPos = MI.getOperand(NumDefs + Idx + 1).getImm();
|
||||
|
||||
while (SeqIdx < NumSeqSrcs &&
|
||||
SeqI.getOperand(2 * SeqIdx + 2).getImm() < ExtractPos)
|
||||
++SeqIdx;
|
||||
|
||||
if (SeqIdx == NumSeqSrcs ||
|
||||
SeqI.getOperand(2 * SeqIdx + 2).getImm() != ExtractPos ||
|
||||
SeqI.getType(SeqIdx + 1) != MI.getType(Idx)) {
|
||||
AllDefsReplaced = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned OrigReg = SeqI.getOperand(2 * SeqIdx + 1).getReg();
|
||||
assert(!TargetRegisterInfo::isPhysicalRegister(OrigReg) &&
|
||||
"unexpected physical register in G_SEQUENCE");
|
||||
|
||||
// Finally we can replace the uses.
|
||||
for (auto &Use : MRI.use_operands(ExtractReg)) {
|
||||
Changed = true;
|
||||
Use.setReg(OrigReg);
|
||||
}
|
||||
}
|
||||
|
||||
if (AllDefsReplaced) {
|
||||
// If SeqI was the next instruction in the BB and we removed it, we'd break
|
||||
// the outer iteration.
|
||||
assert(std::next(MachineBasicBlock::iterator(MI)) != SeqI &&
|
||||
"G_SEQUENCE does not dominate G_EXTRACT");
|
||||
|
||||
MI.eraseFromParent();
|
||||
|
||||
if (MRI.use_empty(SrcReg))
|
||||
SeqI.eraseFromParent();
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool MachineLegalizePass::runOnMachineFunction(MachineFunction &MF) {
|
||||
// If the ISel pipeline failed, do not bother running that pass.
|
||||
if (MF.getProperties().hasProperty(
|
||||
|
@ -94,5 +159,19 @@ bool MachineLegalizePass::runOnMachineFunction(MachineFunction &MF) {
|
|||
|
||||
Changed |= Res == MachineLegalizeHelper::Legalized;
|
||||
}
|
||||
|
||||
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
|
||||
for (auto &MBB : MF) {
|
||||
for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
|
||||
// Get the next Instruction before we try to legalize, because there's a
|
||||
// good chance MI will be deleted.
|
||||
NextMI = std::next(MI);
|
||||
|
||||
Changed |= combineExtracts(*MI, MRI, TII);
|
||||
}
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
|
|
@ -33,13 +33,16 @@ body: |
|
|||
bb.0.entry:
|
||||
liveins: %x0, %x1, %x2, %x3
|
||||
; CHECK-LABEL: name: test_scalar_add_big
|
||||
; CHECK-DAG: [[LHS_LO:%.*]](64), [[LHS_HI:%.*]](64) = G_EXTRACT { s64, s64, s128 } %4, 0, 64
|
||||
; CHECK-DAG: [[RHS_LO:%.*]](64), [[RHS_HI:%.*]](64) = G_EXTRACT { s64, s64, s128 } %5, 0, 64
|
||||
; CHECK-NOT: G_EXTRACT
|
||||
; CHECK-NOT: G_SEQUENCE
|
||||
; CHECK-DAG: [[CARRY0_32:%.*]](32) = G_CONSTANT s32 0
|
||||
; CHECK-DAG: [[CARRY0:%[0-9]+]](1) = G_TRUNC { s1, s32 } [[CARRY0_32]]
|
||||
; CHECK: [[RES_LO:%.*]](64), [[CARRY:%.*]](1) = G_UADDE s64 [[LHS_LO]], [[RHS_LO]], [[CARRY0]]
|
||||
; CHECK: [[RES_HI:%.*]](64), {{%.*}}(1) = G_UADDE s64 [[LHS_HI]], [[RHS_HI]], [[CARRY]]
|
||||
; CHECK: %6(128) = G_SEQUENCE { s128, s64, s64 } [[RES_LO]], 0, [[RES_HI]], 64
|
||||
; CHECK: [[RES_LO:%.*]](64), [[CARRY:%.*]](1) = G_UADDE s64 %0, %2, [[CARRY0]]
|
||||
; CHECK: [[RES_HI:%.*]](64), {{%.*}}(1) = G_UADDE s64 %1, %3, [[CARRY]]
|
||||
; CHECK-NOT: G_EXTRACT
|
||||
; CHECK-NOT: G_SEQUENCE
|
||||
; CHECK: %x0 = COPY [[RES_LO]]
|
||||
; CHECK: %x1 = COPY [[RES_HI]]
|
||||
|
||||
%0(64) = COPY %x0
|
||||
%1(64) = COPY %x1
|
||||
|
@ -93,11 +96,14 @@ body: |
|
|||
bb.0.entry:
|
||||
liveins: %q0, %q1, %q2, %q3
|
||||
; CHECK-LABEL: name: test_vector_add
|
||||
; CHECK-DAG: [[LHS_LO:%.*]](128), [[LHS_HI:%.*]](128) = G_EXTRACT { s128, s128, s256 } %4, 0, 128
|
||||
; CHECK-DAG: [[RHS_LO:%.*]](128), [[RHS_HI:%.*]](128) = G_EXTRACT { s128, s128, s256 } %5, 0, 128
|
||||
; CHECK: [[RES_LO:%.*]](128) = G_ADD <2 x s64> [[LHS_LO]], [[RHS_LO]]
|
||||
; CHECK: [[RES_HI:%.*]](128) = G_ADD <2 x s64> [[LHS_HI]], [[RHS_HI]]
|
||||
; CHECK: %6(256) = G_SEQUENCE { s256, s128, s128 } [[RES_LO]], 0, [[RES_HI]], 128
|
||||
; CHECK-NOT: G_EXTRACT
|
||||
; CHECK-NOT: G_SEQUENCE
|
||||
; CHECK: [[RES_LO:%.*]](128) = G_ADD <2 x s64> %0, %2
|
||||
; CHECK: [[RES_HI:%.*]](128) = G_ADD <2 x s64> %1, %3
|
||||
; CHECK-NOT: G_EXTRACT
|
||||
; CHECK-NOT: G_SEQUENCE
|
||||
; CHECK: %q0 = COPY [[RES_LO]]
|
||||
; CHECK: %q1 = COPY [[RES_HI]]
|
||||
|
||||
%0(128) = COPY %q0
|
||||
%1(128) = COPY %q1
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
# RUN: llc -O0 -run-pass=legalize-mir -global-isel %s -o - 2>&1 | FileCheck %s
|
||||
|
||||
--- |
|
||||
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-apple-ios"
|
||||
define void @test_combines() {
|
||||
entry:
|
||||
ret void
|
||||
}
|
||||
...
|
||||
|
||||
---
|
||||
name: test_combines
|
||||
registers:
|
||||
- { id: 0, class: _ }
|
||||
- { id: 1, class: _ }
|
||||
- { id: 2, class: _ }
|
||||
- { id: 3, class: _ }
|
||||
- { id: 4, class: _ }
|
||||
- { id: 5, class: _ }
|
||||
- { id: 6, class: _ }
|
||||
- { id: 7, class: _ }
|
||||
- { id: 8, class: _ }
|
||||
- { id: 9, class: _ }
|
||||
- { id: 10, class: _ }
|
||||
- { id: 11, class: _ }
|
||||
- { id: 12, class: _ }
|
||||
- { id: 13, class: _ }
|
||||
- { id: 14, class: _ }
|
||||
- { id: 15, class: _ }
|
||||
- { id: 16, class: _ }
|
||||
- { id: 17, class: _ }
|
||||
- { id: 18, class: _ }
|
||||
- { id: 19, class: _ }
|
||||
- { id: 20, class: _ }
|
||||
- { id: 21, class: _ }
|
||||
- { id: 22, class: _ }
|
||||
- { id: 23, class: _ }
|
||||
- { id: 24, class: _ }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: %w0, %w1, %x2, %x3
|
||||
|
||||
%0(32) = COPY %w0
|
||||
%1(32) = COPY %w1
|
||||
%2(8) = G_TRUNC { s8, s32 } %0
|
||||
|
||||
; Only one of these extracts can be eliminated, the offsets don't match
|
||||
; properly in the other cases.
|
||||
; CHECK-LABEL: name: test_combines
|
||||
; CHECK: %3(32) = G_SEQUENCE { s32, s8 } %2, 1
|
||||
; CHECK: %4(8) = G_EXTRACT { s8, s32 } %3, 0
|
||||
; CHECK-NOT: G_EXTRACT
|
||||
; CHECK: %6(8) = G_EXTRACT { s8, s32 } %3, 2
|
||||
; CHECK: %7(32) = G_ZEXT { s32, s8 } %2
|
||||
%3(32) = G_SEQUENCE { s32, s8 } %2, 1
|
||||
%4(8) = G_EXTRACT { s8, s32 } %3, 0
|
||||
%5(8) = G_EXTRACT { s8, s32 } %3, 1
|
||||
%6(8) = G_EXTRACT { s8, s32 } %3, 2
|
||||
%7(32) = G_ZEXT { s32, s8 } %5
|
||||
|
||||
; Similarly, here the types don't match.
|
||||
; CHECK: %10(32) = G_SEQUENCE { s32, s16, s16 } %8, 0, %9, 16
|
||||
; CHECK: %11(1) = G_EXTRACT { s1, s32 } %10, 0
|
||||
; CHECK: %12(32) = G_EXTRACT { s32, s32 } %10, 0
|
||||
%8(16) = G_TRUNC { s16, s32 } %0
|
||||
%9(16) = G_ADD s16 %8, %8
|
||||
%10(32) = G_SEQUENCE { s32, s16, s16 } %8, 0, %9, 16
|
||||
%11(1) = G_EXTRACT { s1, s32 } %10, 0
|
||||
%12(32) = G_EXTRACT { s32, s32 } %10, 0
|
||||
|
||||
; CHECK-NOT: G_EXTRACT
|
||||
; CHECK: %15(16) = G_ADD s16 %8, %9
|
||||
%13(16), %14(16) = G_EXTRACT { s16, s16, s32 } %10, 0, 16
|
||||
%15(16) = G_ADD s16 %13, %14
|
||||
|
||||
; CHECK: %18(64) = G_EXTRACT { <2 x s32>, s128 } %17, 0
|
||||
; CHECK: %19(64) = G_ADD <2 x s32> %18, %18
|
||||
%16(64) = COPY %x0
|
||||
%17(128) = G_SEQUENCE { s128, s64, s64 } %16, 0, %16, 64
|
||||
%18(64) = G_EXTRACT { <2 x s32>, s128 } %17, 0
|
||||
%19(64) = G_ADD <2 x s32> %18, %18
|
||||
|
||||
; CHECK-NOT: G_SEQUENCE
|
||||
; CHECK-NOT: G_EXTRACT
|
||||
; CHECK: %24(32) = G_ADD s32 %0, %20
|
||||
%20(32) = G_ADD s32 %0, %0
|
||||
%21(64) = G_SEQUENCE { s64, s32, s32 } %0, 0, %20, 32
|
||||
%22(32) = G_EXTRACT { s32, s64 } %21, 0
|
||||
%23(32) = G_EXTRACT { s32, s64 } %21, 32
|
||||
%24(32) = G_ADD s32 %22, %23
|
||||
...
|
Loading…
Reference in New Issue