forked from OSchip/llvm-project
[GlobalISel][AArch64] Add support for base register + offset register loads
Add support for folding G_GEPs into loads of the form ``` ldr reg, [base, off] ``` when possible. This can save an add before the load. Currently, this is only supported for loads of 64 bits into 64 bit registers. Add a new addressing mode function, `selectAddrModeRegisterOffset` which performs this folding when it is profitable. Also add a test for addressing modes for G_LOAD. Differential Revision: https://reviews.llvm.org/D64944 llvm-svn: 366503
This commit is contained in:
parent
50057f3288
commit
7a1dcc5ff1
|
@ -67,6 +67,7 @@ private:
|
|||
bool earlySelect(MachineInstr &I) const;
|
||||
|
||||
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
||||
bool earlySelectLoad(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
||||
|
||||
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
|
||||
MachineRegisterInfo &MRI) const;
|
||||
|
@ -182,6 +183,7 @@ private:
|
|||
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
|
||||
return selectAddrModeIndexed(Root, Width / 8);
|
||||
}
|
||||
ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
|
||||
|
||||
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
|
||||
|
||||
|
@ -1158,6 +1160,57 @@ bool AArch64InstructionSelector::earlySelectSHL(
|
|||
return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
|
||||
}
|
||||
|
||||
bool AArch64InstructionSelector::earlySelectLoad(
|
||||
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
||||
// Try to fold in shifts, etc into the addressing mode of a load.
|
||||
assert(I.getOpcode() == TargetOpcode::G_LOAD && "unexpected op");
|
||||
|
||||
// Don't handle atomic loads/stores yet.
|
||||
auto &MemOp = **I.memoperands_begin();
|
||||
if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
|
||||
LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned MemBytes = MemOp.getSize();
|
||||
|
||||
// Only support 64-bit loads for now.
|
||||
if (MemBytes != 8)
|
||||
return false;
|
||||
|
||||
Register DstReg = I.getOperand(0).getReg();
|
||||
const LLT DstTy = MRI.getType(DstReg);
|
||||
// Don't handle vectors.
|
||||
if (DstTy.isVector())
|
||||
return false;
|
||||
|
||||
unsigned DstSize = DstTy.getSizeInBits();
|
||||
// TODO: 32-bit destinations.
|
||||
if (DstSize != 64)
|
||||
return false;
|
||||
|
||||
// Check if we can do any folding from GEPs etc. into the load.
|
||||
auto ImmFn = selectAddrModeRegisterOffset(I.getOperand(1));
|
||||
if (!ImmFn)
|
||||
return false;
|
||||
|
||||
// We can fold something. Emit the load here.
|
||||
MachineIRBuilder MIB(I);
|
||||
|
||||
// Choose the instruction based off the size of the element being loaded, and
|
||||
// whether or not we're loading into a FPR.
|
||||
const RegisterBank &RB = *RBI.getRegBank(DstReg, MRI, TRI);
|
||||
unsigned Opc =
|
||||
RB.getID() == AArch64::GPRRegBankID ? AArch64::LDRXroX : AArch64::LDRDroX;
|
||||
// Construct the load.
|
||||
auto LoadMI = MIB.buildInstr(Opc, {DstReg}, {});
|
||||
for (auto &RenderFn : *ImmFn)
|
||||
RenderFn(LoadMI);
|
||||
LoadMI.addMemOperand(*I.memoperands_begin());
|
||||
I.eraseFromParent();
|
||||
return constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
|
||||
}
|
||||
|
||||
bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
|
||||
assert(I.getParent() && "Instruction should be in a basic block!");
|
||||
assert(I.getParent()->getParent() && "Instruction should be in a function!");
|
||||
|
@ -1169,6 +1222,8 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
|
|||
switch (I.getOpcode()) {
|
||||
case TargetOpcode::G_SHL:
|
||||
return earlySelectSHL(I, MRI);
|
||||
case TargetOpcode::G_LOAD:
|
||||
return earlySelectLoad(I, MRI);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
@ -3891,6 +3946,44 @@ AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
|
|||
}};
|
||||
}
|
||||
|
||||
/// This is used for computing addresses like this:
|
||||
///
|
||||
/// ldr x1, [x2, x3]
|
||||
///
|
||||
/// Where x2 is the base register, and x3 is an offset register.
|
||||
///
|
||||
/// When possible (or profitable) to fold a G_GEP into the address calculation,
|
||||
/// this will do so. Otherwise, it will return None.
|
||||
InstructionSelector::ComplexRendererFns
|
||||
AArch64InstructionSelector::selectAddrModeRegisterOffset(
|
||||
MachineOperand &Root) const {
|
||||
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
|
||||
|
||||
// If we have a constant offset, then we probably don't want to match a
|
||||
// register offset.
|
||||
if (isBaseWithConstantOffset(Root, MRI))
|
||||
return None;
|
||||
|
||||
// We need a GEP.
|
||||
MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
|
||||
if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
|
||||
return None;
|
||||
|
||||
// If this is used more than once, let's not bother folding.
|
||||
// TODO: Check if they are memory ops. If they are, then we can still fold
|
||||
// without having to recompute anything.
|
||||
if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
|
||||
return None;
|
||||
|
||||
// Base is the GEP's LHS, offset is its RHS.
|
||||
return {{
|
||||
[=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
|
||||
[=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(2)); },
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
|
||||
}};
|
||||
}
|
||||
|
||||
/// Select a "register plus unscaled signed 9-bit immediate" address. This
|
||||
/// should only match when there is an offset that is not valid for a scaled
|
||||
/// immediate addressing mode. The "Size" argument is the size in bytes of the
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
--- |
|
||||
define void @ldrxrox_breg_oreg(i64* %addr) { ret void }
|
||||
define void @ldrdrox_breg_oreg(i64* %addr) { ret void }
|
||||
define void @more_than_one_use(i64* %addr) { ret void }
|
||||
...
|
||||
|
||||
---
|
||||
name: ldrxrox_breg_oreg
|
||||
alignment: 2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0, $x1
|
||||
|
||||
; CHECK-LABEL: name: ldrxrox_breg_oreg
|
||||
; CHECK: liveins: $x0, $x1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
|
||||
; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY]], [[COPY1]], 0, 0 :: (load 8 from %ir.addr)
|
||||
; CHECK: $x0 = COPY [[LDRXroX]]
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%1:gpr(s64) = COPY $x1
|
||||
%2:gpr(p0) = G_GEP %0, %1
|
||||
%4:gpr(s64) = G_LOAD %2(p0) :: (load 8 from %ir.addr)
|
||||
$x0 = COPY %4(s64)
|
||||
RET_ReallyLR implicit $x0
|
||||
...
|
||||
|
||||
---
|
||||
name: ldrdrox_breg_oreg
|
||||
alignment: 2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $d0, $x1
|
||||
; CHECK-LABEL: name: ldrdrox_breg_oreg
|
||||
; CHECK: liveins: $d0, $x1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
|
||||
; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY]], [[COPY1]], 0, 0 :: (load 8 from %ir.addr)
|
||||
; CHECK: $d0 = COPY [[LDRDroX]]
|
||||
; CHECK: RET_ReallyLR implicit $d0
|
||||
%0:gpr(p0) = COPY $d0
|
||||
%1:gpr(s64) = COPY $x1
|
||||
%2:gpr(p0) = G_GEP %0, %1
|
||||
%4:fpr(s64) = G_LOAD %2(p0) :: (load 8 from %ir.addr)
|
||||
$d0 = COPY %4(s64)
|
||||
RET_ReallyLR implicit $d0
|
||||
...
|
||||
|
||||
---
|
||||
name: more_than_one_use
|
||||
alignment: 2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $x0, $x1
|
||||
; This shouldn't be folded, since we reuse the result of the G_GEP outside
|
||||
; the G_LOAD
|
||||
; CHECK-LABEL: name: more_than_one_use
|
||||
; CHECK: liveins: $x0, $x1
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
|
||||
; CHECK: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY]], [[COPY1]]
|
||||
; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load 8 from %ir.addr)
|
||||
; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
|
||||
; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[LDRXui]]
|
||||
; CHECK: $x0 = COPY [[ADDXrr1]]
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
%0:gpr(p0) = COPY $x0
|
||||
%1:gpr(s64) = COPY $x1
|
||||
%2:gpr(p0) = G_GEP %0, %1
|
||||
%4:gpr(s64) = G_LOAD %2(p0) :: (load 8 from %ir.addr)
|
||||
%5:gpr(s64) = G_PTRTOINT %2
|
||||
%6:gpr(s64) = G_ADD %5, %4
|
||||
$x0 = COPY %6(s64)
|
||||
RET_ReallyLR implicit $x0
|
Loading…
Reference in New Issue