forked from OSchip/llvm-project
[GlobalISel][AArch64] Add support for base register + offset register loads
Add support for folding G_GEPs into loads of the form ``` ldr reg, [base, off] ``` when possible. This can save an add before the load. Currently, this is only supported for loads of 64 bits into 64 bit registers. Add a new addressing mode function, `selectAddrModeRegisterOffset` which performs this folding when it is profitable. Also add a test for addressing modes for G_LOAD. Differential Revision: https://reviews.llvm.org/D64944 llvm-svn: 366503
This commit is contained in:
parent
50057f3288
commit
7a1dcc5ff1
|
@ -67,6 +67,7 @@ private:
|
||||||
bool earlySelect(MachineInstr &I) const;
|
bool earlySelect(MachineInstr &I) const;
|
||||||
|
|
||||||
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
||||||
|
bool earlySelectLoad(MachineInstr &I, MachineRegisterInfo &MRI) const;
|
||||||
|
|
||||||
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
|
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
|
||||||
MachineRegisterInfo &MRI) const;
|
MachineRegisterInfo &MRI) const;
|
||||||
|
@ -182,6 +183,7 @@ private:
|
||||||
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
|
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
|
||||||
return selectAddrModeIndexed(Root, Width / 8);
|
return selectAddrModeIndexed(Root, Width / 8);
|
||||||
}
|
}
|
||||||
|
ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
|
||||||
|
|
||||||
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
|
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
|
||||||
|
|
||||||
|
@ -1158,6 +1160,57 @@ bool AArch64InstructionSelector::earlySelectSHL(
|
||||||
return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
|
return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AArch64InstructionSelector::earlySelectLoad(
|
||||||
|
MachineInstr &I, MachineRegisterInfo &MRI) const {
|
||||||
|
// Try to fold in shifts, etc into the addressing mode of a load.
|
||||||
|
assert(I.getOpcode() == TargetOpcode::G_LOAD && "unexpected op");
|
||||||
|
|
||||||
|
// Don't handle atomic loads/stores yet.
|
||||||
|
auto &MemOp = **I.memoperands_begin();
|
||||||
|
if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
|
||||||
|
LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned MemBytes = MemOp.getSize();
|
||||||
|
|
||||||
|
// Only support 64-bit loads for now.
|
||||||
|
if (MemBytes != 8)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Register DstReg = I.getOperand(0).getReg();
|
||||||
|
const LLT DstTy = MRI.getType(DstReg);
|
||||||
|
// Don't handle vectors.
|
||||||
|
if (DstTy.isVector())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
unsigned DstSize = DstTy.getSizeInBits();
|
||||||
|
// TODO: 32-bit destinations.
|
||||||
|
if (DstSize != 64)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Check if we can do any folding from GEPs etc. into the load.
|
||||||
|
auto ImmFn = selectAddrModeRegisterOffset(I.getOperand(1));
|
||||||
|
if (!ImmFn)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// We can fold something. Emit the load here.
|
||||||
|
MachineIRBuilder MIB(I);
|
||||||
|
|
||||||
|
// Choose the instruction based off the size of the element being loaded, and
|
||||||
|
// whether or not we're loading into a FPR.
|
||||||
|
const RegisterBank &RB = *RBI.getRegBank(DstReg, MRI, TRI);
|
||||||
|
unsigned Opc =
|
||||||
|
RB.getID() == AArch64::GPRRegBankID ? AArch64::LDRXroX : AArch64::LDRDroX;
|
||||||
|
// Construct the load.
|
||||||
|
auto LoadMI = MIB.buildInstr(Opc, {DstReg}, {});
|
||||||
|
for (auto &RenderFn : *ImmFn)
|
||||||
|
RenderFn(LoadMI);
|
||||||
|
LoadMI.addMemOperand(*I.memoperands_begin());
|
||||||
|
I.eraseFromParent();
|
||||||
|
return constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
|
||||||
|
}
|
||||||
|
|
||||||
bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
|
bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
|
||||||
assert(I.getParent() && "Instruction should be in a basic block!");
|
assert(I.getParent() && "Instruction should be in a basic block!");
|
||||||
assert(I.getParent()->getParent() && "Instruction should be in a function!");
|
assert(I.getParent()->getParent() && "Instruction should be in a function!");
|
||||||
|
@ -1169,6 +1222,8 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
|
||||||
switch (I.getOpcode()) {
|
switch (I.getOpcode()) {
|
||||||
case TargetOpcode::G_SHL:
|
case TargetOpcode::G_SHL:
|
||||||
return earlySelectSHL(I, MRI);
|
return earlySelectSHL(I, MRI);
|
||||||
|
case TargetOpcode::G_LOAD:
|
||||||
|
return earlySelectLoad(I, MRI);
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -3891,6 +3946,44 @@ AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// This is used for computing addresses like this:
|
||||||
|
///
|
||||||
|
/// ldr x1, [x2, x3]
|
||||||
|
///
|
||||||
|
/// Where x2 is the base register, and x3 is an offset register.
|
||||||
|
///
|
||||||
|
/// When possible (or profitable) to fold a G_GEP into the address calculation,
|
||||||
|
/// this will do so. Otherwise, it will return None.
|
||||||
|
InstructionSelector::ComplexRendererFns
|
||||||
|
AArch64InstructionSelector::selectAddrModeRegisterOffset(
|
||||||
|
MachineOperand &Root) const {
|
||||||
|
MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
|
||||||
|
|
||||||
|
// If we have a constant offset, then we probably don't want to match a
|
||||||
|
// register offset.
|
||||||
|
if (isBaseWithConstantOffset(Root, MRI))
|
||||||
|
return None;
|
||||||
|
|
||||||
|
// We need a GEP.
|
||||||
|
MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
|
||||||
|
if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
|
||||||
|
return None;
|
||||||
|
|
||||||
|
// If this is used more than once, let's not bother folding.
|
||||||
|
// TODO: Check if they are memory ops. If they are, then we can still fold
|
||||||
|
// without having to recompute anything.
|
||||||
|
if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
|
||||||
|
return None;
|
||||||
|
|
||||||
|
// Base is the GEP's LHS, offset is its RHS.
|
||||||
|
return {{
|
||||||
|
[=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
|
||||||
|
[=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(2)); },
|
||||||
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
|
||||||
|
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
|
||||||
|
}};
|
||||||
|
}
|
||||||
|
|
||||||
/// Select a "register plus unscaled signed 9-bit immediate" address. This
|
/// Select a "register plus unscaled signed 9-bit immediate" address. This
|
||||||
/// should only match when there is an offset that is not valid for a scaled
|
/// should only match when there is an offset that is not valid for a scaled
|
||||||
/// immediate addressing mode. The "Size" argument is the size in bytes of the
|
/// immediate addressing mode. The "Size" argument is the size in bytes of the
|
||||||
|
|
|
@ -0,0 +1,90 @@
|
||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
--- |
|
||||||
|
define void @ldrxrox_breg_oreg(i64* %addr) { ret void }
|
||||||
|
define void @ldrdrox_breg_oreg(i64* %addr) { ret void }
|
||||||
|
define void @more_than_one_use(i64* %addr) { ret void }
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: ldrxrox_breg_oreg
|
||||||
|
alignment: 2
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
machineFunctionInfo: {}
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $x0, $x1
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: ldrxrox_breg_oreg
|
||||||
|
; CHECK: liveins: $x0, $x1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
|
||||||
|
; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY]], [[COPY1]], 0, 0 :: (load 8 from %ir.addr)
|
||||||
|
; CHECK: $x0 = COPY [[LDRXroX]]
|
||||||
|
; CHECK: RET_ReallyLR implicit $x0
|
||||||
|
%0:gpr(p0) = COPY $x0
|
||||||
|
%1:gpr(s64) = COPY $x1
|
||||||
|
%2:gpr(p0) = G_GEP %0, %1
|
||||||
|
%4:gpr(s64) = G_LOAD %2(p0) :: (load 8 from %ir.addr)
|
||||||
|
$x0 = COPY %4(s64)
|
||||||
|
RET_ReallyLR implicit $x0
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: ldrdrox_breg_oreg
|
||||||
|
alignment: 2
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
machineFunctionInfo: {}
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $d0, $x1
|
||||||
|
; CHECK-LABEL: name: ldrdrox_breg_oreg
|
||||||
|
; CHECK: liveins: $d0, $x1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
|
||||||
|
; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY]], [[COPY1]], 0, 0 :: (load 8 from %ir.addr)
|
||||||
|
; CHECK: $d0 = COPY [[LDRDroX]]
|
||||||
|
; CHECK: RET_ReallyLR implicit $d0
|
||||||
|
%0:gpr(p0) = COPY $d0
|
||||||
|
%1:gpr(s64) = COPY $x1
|
||||||
|
%2:gpr(p0) = G_GEP %0, %1
|
||||||
|
%4:fpr(s64) = G_LOAD %2(p0) :: (load 8 from %ir.addr)
|
||||||
|
$d0 = COPY %4(s64)
|
||||||
|
RET_ReallyLR implicit $d0
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: more_than_one_use
|
||||||
|
alignment: 2
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
tracksRegLiveness: true
|
||||||
|
machineFunctionInfo: {}
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $x0, $x1
|
||||||
|
; This shouldn't be folded, since we reuse the result of the G_GEP outside
|
||||||
|
; the G_LOAD
|
||||||
|
; CHECK-LABEL: name: more_than_one_use
|
||||||
|
; CHECK: liveins: $x0, $x1
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
|
||||||
|
; CHECK: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY]], [[COPY1]]
|
||||||
|
; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load 8 from %ir.addr)
|
||||||
|
; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
|
||||||
|
; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[LDRXui]]
|
||||||
|
; CHECK: $x0 = COPY [[ADDXrr1]]
|
||||||
|
; CHECK: RET_ReallyLR implicit $x0
|
||||||
|
%0:gpr(p0) = COPY $x0
|
||||||
|
%1:gpr(s64) = COPY $x1
|
||||||
|
%2:gpr(p0) = G_GEP %0, %1
|
||||||
|
%4:gpr(s64) = G_LOAD %2(p0) :: (load 8 from %ir.addr)
|
||||||
|
%5:gpr(s64) = G_PTRTOINT %2
|
||||||
|
%6:gpr(s64) = G_ADD %5, %4
|
||||||
|
$x0 = COPY %6(s64)
|
||||||
|
RET_ReallyLR implicit $x0
|
Loading…
Reference in New Issue