forked from OSchip/llvm-project
679 lines
24 KiB
C++
679 lines
24 KiB
C++
//===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines the pass that finds instructions that can be
|
|
// re-written as LEA instructions in order to reduce pipeline delays.
|
|
// It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "X86.h"
|
|
#include "X86InstrInfo.h"
|
|
#include "X86Subtarget.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
#include "llvm/CodeGen/Passes.h"
|
|
#include "llvm/CodeGen/TargetSchedule.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
using namespace llvm;
|
|
|
|
#define FIXUPLEA_DESC "X86 LEA Fixup"
|
|
#define FIXUPLEA_NAME "x86-fixup-LEAs"
|
|
|
|
#define DEBUG_TYPE FIXUPLEA_NAME
|
|
|
|
STATISTIC(NumLEAs, "Number of LEA instructions created");
|
|
|
|
namespace {
|
|
class FixupLEAPass : public MachineFunctionPass {
|
|
enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
|
|
|
|
/// Given a machine register, look for the instruction
|
|
/// which writes it in the current basic block. If found,
|
|
/// try to replace it with an equivalent LEA instruction.
|
|
/// If replacement succeeds, then also process the newly created
|
|
/// instruction.
|
|
void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
|
|
MachineBasicBlock &MBB);
|
|
|
|
/// Given a memory access or LEA instruction
|
|
/// whose address mode uses a base and/or index register, look for
|
|
/// an opportunity to replace the instruction which sets the base or index
|
|
/// register with an equivalent LEA instruction.
|
|
void processInstruction(MachineBasicBlock::iterator &I,
|
|
MachineBasicBlock &MBB);
|
|
|
|
/// Given a LEA instruction which is unprofitable
|
|
/// on SlowLEA targets try to replace it with an equivalent ADD instruction.
|
|
void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
|
|
MachineBasicBlock &MBB);
|
|
|
|
/// Given a LEA instruction which is unprofitable
|
|
/// on SNB+ try to replace it with other instructions.
|
|
/// According to Intel's Optimization Reference Manual:
|
|
/// " For LEA instructions with three source operands and some specific
|
|
/// situations, instruction latency has increased to 3 cycles, and must
|
|
/// dispatch via port 1:
|
|
/// - LEA that has all three source operands: base, index, and offset
|
|
/// - LEA that uses base and index registers where the base is EBP, RBP,
|
|
/// or R13
|
|
/// - LEA that uses RIP relative addressing mode
|
|
/// - LEA that uses 16-bit addressing mode "
|
|
/// This function currently handles the first 2 cases only.
|
|
void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
|
|
MachineBasicBlock &MBB, bool OptIncDec);
|
|
|
|
/// Look for LEAs that are really two address LEAs that we might be able to
|
|
/// turn into regular ADD instructions.
|
|
bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
|
|
MachineBasicBlock &MBB, bool OptIncDec,
|
|
bool UseLEAForSP) const;
|
|
|
|
/// Determine if an instruction references a machine register
|
|
/// and, if so, whether it reads or writes the register.
|
|
RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
|
|
|
|
/// Step backwards through a basic block, looking
|
|
/// for an instruction which writes a register within
|
|
/// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
|
|
MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
|
|
MachineBasicBlock::iterator &I,
|
|
MachineBasicBlock &MBB);
|
|
|
|
/// if an instruction can be converted to an
|
|
/// equivalent LEA, insert the new instruction into the basic block
|
|
/// and return a pointer to it. Otherwise, return zero.
|
|
MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
|
|
MachineBasicBlock::iterator &MBBI) const;
|
|
|
|
public:
|
|
static char ID;
|
|
|
|
StringRef getPassName() const override { return FIXUPLEA_DESC; }
|
|
|
|
FixupLEAPass() : MachineFunctionPass(ID) { }
|
|
|
|
/// Loop over all of the basic blocks,
|
|
/// replacing instructions by equivalent LEA instructions
|
|
/// if needed and when possible.
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
// This pass runs after regalloc and doesn't support VReg operands.
|
|
MachineFunctionProperties getRequiredProperties() const override {
|
|
return MachineFunctionProperties().set(
|
|
MachineFunctionProperties::Property::NoVRegs);
|
|
}
|
|
|
|
private:
|
|
TargetSchedModel TSM;
|
|
const X86InstrInfo *TII = nullptr;
|
|
const X86RegisterInfo *TRI = nullptr;
|
|
};
|
|
}
|
|
|
|
char FixupLEAPass::ID = 0;
|
|
|
|
INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
|
|
|
|
MachineInstr *
|
|
FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
|
|
MachineBasicBlock::iterator &MBBI) const {
|
|
MachineInstr &MI = *MBBI;
|
|
switch (MI.getOpcode()) {
|
|
case X86::MOV32rr:
|
|
case X86::MOV64rr: {
|
|
const MachineOperand &Src = MI.getOperand(1);
|
|
const MachineOperand &Dest = MI.getOperand(0);
|
|
MachineInstr *NewMI =
|
|
BuildMI(MBB, MBBI, MI.getDebugLoc(),
|
|
TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
|
|
: X86::LEA64r))
|
|
.add(Dest)
|
|
.add(Src)
|
|
.addImm(1)
|
|
.addReg(0)
|
|
.addImm(0)
|
|
.addReg(0);
|
|
return NewMI;
|
|
}
|
|
}
|
|
|
|
if (!MI.isConvertibleTo3Addr())
|
|
return nullptr;
|
|
|
|
switch (MI.getOpcode()) {
|
|
default:
|
|
// Only convert instructions that we've verified are safe.
|
|
return nullptr;
|
|
case X86::ADD64ri32:
|
|
case X86::ADD64ri8:
|
|
case X86::ADD64ri32_DB:
|
|
case X86::ADD64ri8_DB:
|
|
case X86::ADD32ri:
|
|
case X86::ADD32ri8:
|
|
case X86::ADD32ri_DB:
|
|
case X86::ADD32ri8_DB:
|
|
if (!MI.getOperand(2).isImm()) {
|
|
// convertToThreeAddress will call getImm()
|
|
// which requires isImm() to be true
|
|
return nullptr;
|
|
}
|
|
break;
|
|
case X86::SHL64ri:
|
|
case X86::SHL32ri:
|
|
case X86::INC64r:
|
|
case X86::INC32r:
|
|
case X86::DEC64r:
|
|
case X86::DEC32r:
|
|
case X86::ADD64rr:
|
|
case X86::ADD64rr_DB:
|
|
case X86::ADD32rr:
|
|
case X86::ADD32rr_DB:
|
|
// These instructions are all fine to convert.
|
|
break;
|
|
}
|
|
MachineFunction::iterator MFI = MBB.getIterator();
|
|
return TII->convertToThreeAddress(MFI, MI, nullptr);
|
|
}
|
|
|
|
FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
|
|
|
|
static bool isLEA(unsigned Opcode) {
|
|
return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
|
|
Opcode == X86::LEA64_32r;
|
|
}
|
|
|
|
bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
|
|
if (skipFunction(MF.getFunction()))
|
|
return false;
|
|
|
|
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
|
|
bool IsSlowLEA = ST.slowLEA();
|
|
bool IsSlow3OpsLEA = ST.slow3OpsLEA();
|
|
bool LEAUsesAG = ST.LEAusesAG();
|
|
|
|
bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
|
|
bool UseLEAForSP = ST.useLeaForSP();
|
|
|
|
TSM.init(&ST);
|
|
TII = ST.getInstrInfo();
|
|
TRI = ST.getRegisterInfo();
|
|
|
|
LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
|
|
for (MachineBasicBlock &MBB : MF) {
|
|
// First pass. Try to remove or optimize existing LEAs.
|
|
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
|
|
if (!isLEA(I->getOpcode()))
|
|
continue;
|
|
|
|
if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP))
|
|
continue;
|
|
|
|
if (IsSlowLEA)
|
|
processInstructionForSlowLEA(I, MBB);
|
|
else if (IsSlow3OpsLEA)
|
|
processInstrForSlow3OpLEA(I, MBB, OptIncDec);
|
|
}
|
|
|
|
// Second pass for creating LEAs. This may reverse some of the
|
|
// transformations above.
|
|
if (LEAUsesAG) {
|
|
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
|
|
processInstruction(I, MBB);
|
|
}
|
|
}
|
|
|
|
LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
|
|
|
|
return true;
|
|
}
|
|
|
|
FixupLEAPass::RegUsageState
|
|
FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
|
|
RegUsageState RegUsage = RU_NotUsed;
|
|
MachineInstr &MI = *I;
|
|
|
|
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
|
|
MachineOperand &opnd = MI.getOperand(i);
|
|
if (opnd.isReg() && opnd.getReg() == p.getReg()) {
|
|
if (opnd.isDef())
|
|
return RU_Write;
|
|
RegUsage = RU_Read;
|
|
}
|
|
}
|
|
return RegUsage;
|
|
}
|
|
|
|
/// getPreviousInstr - Given a reference to an instruction in a basic
|
|
/// block, return a reference to the previous instruction in the block,
|
|
/// wrapping around to the last instruction of the block if the block
|
|
/// branches to itself.
|
|
static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
|
|
MachineBasicBlock &MBB) {
|
|
if (I == MBB.begin()) {
|
|
if (MBB.isPredecessor(&MBB)) {
|
|
I = --MBB.end();
|
|
return true;
|
|
} else
|
|
return false;
|
|
}
|
|
--I;
|
|
return true;
|
|
}
|
|
|
|
MachineBasicBlock::iterator
|
|
FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
|
|
MachineBasicBlock &MBB) {
|
|
int InstrDistance = 1;
|
|
MachineBasicBlock::iterator CurInst;
|
|
static const int INSTR_DISTANCE_THRESHOLD = 5;
|
|
|
|
CurInst = I;
|
|
bool Found;
|
|
Found = getPreviousInstr(CurInst, MBB);
|
|
while (Found && I != CurInst) {
|
|
if (CurInst->isCall() || CurInst->isInlineAsm())
|
|
break;
|
|
if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
|
|
break; // too far back to make a difference
|
|
if (usesRegister(p, CurInst) == RU_Write) {
|
|
return CurInst;
|
|
}
|
|
InstrDistance += TSM.computeInstrLatency(&*CurInst);
|
|
Found = getPreviousInstr(CurInst, MBB);
|
|
}
|
|
return MachineBasicBlock::iterator();
|
|
}
|
|
|
|
static inline bool isInefficientLEAReg(unsigned Reg) {
|
|
return Reg == X86::EBP || Reg == X86::RBP ||
|
|
Reg == X86::R13D || Reg == X86::R13;
|
|
}
|
|
|
|
/// Returns true if this LEA uses base an index registers, and the base register
|
|
/// is known to be inefficient for the subtarget.
|
|
// TODO: use a variant scheduling class to model the latency profile
|
|
// of LEA instructions, and implement this logic as a scheduling predicate.
|
|
static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
|
|
const MachineOperand &Index) {
|
|
return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
|
|
Index.getReg() != X86::NoRegister;
|
|
}
|
|
|
|
static inline bool hasLEAOffset(const MachineOperand &Offset) {
|
|
return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
|
|
}
|
|
|
|
static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
|
|
switch (LEAOpcode) {
|
|
default:
|
|
llvm_unreachable("Unexpected LEA instruction");
|
|
case X86::LEA32r:
|
|
case X86::LEA64_32r:
|
|
return X86::ADD32rr;
|
|
case X86::LEA64r:
|
|
return X86::ADD64rr;
|
|
}
|
|
}
|
|
|
|
static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
|
|
const MachineOperand &Offset) {
|
|
bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
|
|
switch (LEAOpcode) {
|
|
default:
|
|
llvm_unreachable("Unexpected LEA instruction");
|
|
case X86::LEA32r:
|
|
case X86::LEA64_32r:
|
|
return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
|
|
case X86::LEA64r:
|
|
return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
|
|
}
|
|
}
|
|
|
|
static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
|
|
switch (LEAOpcode) {
|
|
default:
|
|
llvm_unreachable("Unexpected LEA instruction");
|
|
case X86::LEA32r:
|
|
case X86::LEA64_32r:
|
|
return IsINC ? X86::INC32r : X86::DEC32r;
|
|
case X86::LEA64r:
|
|
return IsINC ? X86::INC64r : X86::DEC64r;
|
|
}
|
|
}
|
|
|
|
bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
|
|
MachineBasicBlock &MBB, bool OptIncDec,
|
|
bool UseLEAForSP) const {
|
|
MachineInstr &MI = *I;
|
|
|
|
const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
|
|
const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
|
|
const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
|
|
const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp);
|
|
const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
|
|
|
|
if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
|
|
!TII->isSafeToClobberEFLAGS(MBB, I))
|
|
return false;
|
|
|
|
Register DestReg = MI.getOperand(0).getReg();
|
|
Register BaseReg = Base.getReg();
|
|
Register IndexReg = Index.getReg();
|
|
|
|
// Don't change stack adjustment LEAs.
|
|
if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
|
|
return false;
|
|
|
|
// LEA64_32 has 64-bit operands but 32-bit result.
|
|
if (MI.getOpcode() == X86::LEA64_32r) {
|
|
if (BaseReg != 0)
|
|
BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
|
|
if (IndexReg != 0)
|
|
IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
|
|
}
|
|
|
|
MachineInstr *NewMI = nullptr;
|
|
|
|
// Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
|
|
// which can be turned into add %reg2, %reg1
|
|
if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
|
|
(DestReg == BaseReg || DestReg == IndexReg)) {
|
|
unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
|
|
if (DestReg != BaseReg)
|
|
std::swap(BaseReg, IndexReg);
|
|
|
|
if (MI.getOpcode() == X86::LEA64_32r) {
|
|
// TODO: Do we need the super register implicit use?
|
|
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
|
|
.addReg(BaseReg).addReg(IndexReg)
|
|
.addReg(Base.getReg(), RegState::Implicit)
|
|
.addReg(Index.getReg(), RegState::Implicit);
|
|
} else {
|
|
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
|
|
.addReg(BaseReg).addReg(IndexReg);
|
|
}
|
|
} else if (DestReg == BaseReg && IndexReg == 0) {
|
|
// This is an LEA with only a base register and a displacement,
|
|
// We can use ADDri or INC/DEC.
|
|
|
|
// Does this LEA have one these forms:
|
|
// lea %reg, 1(%reg)
|
|
// lea %reg, -1(%reg)
|
|
if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
|
|
bool IsINC = Disp.getImm() == 1;
|
|
unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
|
|
|
|
if (MI.getOpcode() == X86::LEA64_32r) {
|
|
// TODO: Do we need the super register implicit use?
|
|
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
|
|
.addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
|
|
} else {
|
|
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
|
|
.addReg(BaseReg);
|
|
}
|
|
} else {
|
|
unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
|
|
if (MI.getOpcode() == X86::LEA64_32r) {
|
|
// TODO: Do we need the super register implicit use?
|
|
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
|
|
.addReg(BaseReg).addImm(Disp.getImm())
|
|
.addReg(Base.getReg(), RegState::Implicit);
|
|
} else {
|
|
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
|
|
.addReg(BaseReg).addImm(Disp.getImm());
|
|
}
|
|
}
|
|
} else
|
|
return false;
|
|
|
|
MBB.erase(I);
|
|
I = NewMI;
|
|
return true;
|
|
}
|
|
|
|
void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
|
|
MachineBasicBlock &MBB) {
|
|
// Process a load, store, or LEA instruction.
|
|
MachineInstr &MI = *I;
|
|
const MCInstrDesc &Desc = MI.getDesc();
|
|
int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);
|
|
if (AddrOffset >= 0) {
|
|
AddrOffset += X86II::getOperandBias(Desc);
|
|
MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
|
|
if (p.isReg() && p.getReg() != X86::ESP) {
|
|
seekLEAFixup(p, I, MBB);
|
|
}
|
|
MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
|
|
if (q.isReg() && q.getReg() != X86::ESP) {
|
|
seekLEAFixup(q, I, MBB);
|
|
}
|
|
}
|
|
}
|
|
|
|
void FixupLEAPass::seekLEAFixup(MachineOperand &p,
|
|
MachineBasicBlock::iterator &I,
|
|
MachineBasicBlock &MBB) {
|
|
MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
|
|
if (MBI != MachineBasicBlock::iterator()) {
|
|
MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
|
|
if (NewMI) {
|
|
++NumLEAs;
|
|
LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
|
|
// now to replace with an equivalent LEA...
|
|
LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
|
|
MBB.erase(MBI);
|
|
MachineBasicBlock::iterator J =
|
|
static_cast<MachineBasicBlock::iterator>(NewMI);
|
|
processInstruction(J, MBB);
|
|
}
|
|
}
|
|
}
|
|
|
|
void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
|
|
MachineBasicBlock &MBB) {
|
|
MachineInstr &MI = *I;
|
|
const unsigned Opcode = MI.getOpcode();
|
|
|
|
const MachineOperand &Dst = MI.getOperand(0);
|
|
const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
|
|
const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
|
|
const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
|
|
const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
|
|
const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
|
|
|
|
if (Segment.getReg() != 0 || !Offset.isImm() ||
|
|
!TII->isSafeToClobberEFLAGS(MBB, I))
|
|
return;
|
|
const Register DstR = Dst.getReg();
|
|
const Register SrcR1 = Base.getReg();
|
|
const Register SrcR2 = Index.getReg();
|
|
if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
|
|
return;
|
|
if (Scale.getImm() > 1)
|
|
return;
|
|
LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
|
|
LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
|
|
MachineInstr *NewMI = nullptr;
|
|
// Make ADD instruction for two registers writing to LEA's destination
|
|
if (SrcR1 != 0 && SrcR2 != 0) {
|
|
const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
|
|
const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
|
|
NewMI =
|
|
BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
|
|
LLVM_DEBUG(NewMI->dump(););
|
|
}
|
|
// Make ADD instruction for immediate
|
|
if (Offset.getImm() != 0) {
|
|
const MCInstrDesc &ADDri =
|
|
TII->get(getADDriFromLEA(Opcode, Offset));
|
|
const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
|
|
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
|
|
.add(SrcR)
|
|
.addImm(Offset.getImm());
|
|
LLVM_DEBUG(NewMI->dump(););
|
|
}
|
|
if (NewMI) {
|
|
MBB.erase(I);
|
|
I = NewMI;
|
|
}
|
|
}
|
|
|
|
void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
|
|
MachineBasicBlock &MBB,
|
|
bool OptIncDec) {
|
|
MachineInstr &MI = *I;
|
|
const unsigned LEAOpcode = MI.getOpcode();
|
|
|
|
const MachineOperand &Dest = MI.getOperand(0);
|
|
const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
|
|
const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
|
|
const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
|
|
const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
|
|
const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
|
|
|
|
if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
|
|
!TII->isSafeToClobberEFLAGS(MBB, MI) ||
|
|
Segment.getReg() != X86::NoRegister)
|
|
return;
|
|
|
|
Register DestReg = Dest.getReg();
|
|
Register BaseReg = Base.getReg();
|
|
Register IndexReg = Index.getReg();
|
|
|
|
if (MI.getOpcode() == X86::LEA64_32r) {
|
|
if (BaseReg != 0)
|
|
BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
|
|
if (IndexReg != 0)
|
|
IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
|
|
}
|
|
|
|
bool IsScale1 = Scale.getImm() == 1;
|
|
bool IsInefficientBase = isInefficientLEAReg(BaseReg);
|
|
bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
|
|
|
|
// Skip these cases since it takes more than 2 instructions
|
|
// to replace the LEA instruction.
|
|
if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
|
|
return;
|
|
|
|
LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
|
|
LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
|
|
|
|
MachineInstr *NewMI = nullptr;
|
|
|
|
// First try to replace LEA with one or two (for the 3-op LEA case)
|
|
// add instructions:
|
|
// 1.lea (%base,%index,1), %base => add %index,%base
|
|
// 2.lea (%base,%index,1), %index => add %base,%index
|
|
if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
|
|
unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
|
|
if (DestReg != BaseReg)
|
|
std::swap(BaseReg, IndexReg);
|
|
|
|
if (MI.getOpcode() == X86::LEA64_32r) {
|
|
// TODO: Do we need the super register implicit use?
|
|
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
|
|
.addReg(BaseReg)
|
|
.addReg(IndexReg)
|
|
.addReg(Base.getReg(), RegState::Implicit)
|
|
.addReg(Index.getReg(), RegState::Implicit);
|
|
} else {
|
|
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
|
|
.addReg(BaseReg)
|
|
.addReg(IndexReg);
|
|
}
|
|
} else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
|
|
// If the base is inefficient try switching the index and base operands,
|
|
// otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
|
|
// lea offset(%base,%index,scale),%dst =>
|
|
// lea (%base,%index,scale); add offset,%dst
|
|
NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
|
|
.add(Dest)
|
|
.add(IsInefficientBase ? Index : Base)
|
|
.add(Scale)
|
|
.add(IsInefficientBase ? Base : Index)
|
|
.addImm(0)
|
|
.add(Segment);
|
|
LLVM_DEBUG(NewMI->dump(););
|
|
}
|
|
|
|
// If either replacement succeeded above, add the offset if needed, then
|
|
// replace the instruction.
|
|
if (NewMI) {
|
|
// Create ADD instruction for the Offset in case of 3-Ops LEA.
|
|
if (hasLEAOffset(Offset)) {
|
|
if (OptIncDec && Offset.isImm() &&
|
|
(Offset.getImm() == 1 || Offset.getImm() == -1)) {
|
|
unsigned NewOpc =
|
|
getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
|
|
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
|
|
.addReg(DestReg);
|
|
LLVM_DEBUG(NewMI->dump(););
|
|
} else {
|
|
unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
|
|
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
|
|
.addReg(DestReg)
|
|
.add(Offset);
|
|
LLVM_DEBUG(NewMI->dump(););
|
|
}
|
|
}
|
|
|
|
MBB.erase(I);
|
|
I = NewMI;
|
|
return;
|
|
}
|
|
|
|
// Handle the rest of the cases with inefficient base register:
|
|
assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
|
|
assert(IsInefficientBase && "efficient base should be handled already!");
|
|
|
|
// FIXME: Handle LEA64_32r.
|
|
if (LEAOpcode == X86::LEA64_32r)
|
|
return;
|
|
|
|
// lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
|
|
if (IsScale1 && !hasLEAOffset(Offset)) {
|
|
bool BIK = Base.isKill() && BaseReg != IndexReg;
|
|
TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
|
|
LLVM_DEBUG(MI.getPrevNode()->dump(););
|
|
|
|
unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
|
|
NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
|
|
.addReg(DestReg)
|
|
.add(Index);
|
|
LLVM_DEBUG(NewMI->dump(););
|
|
|
|
MBB.erase(I);
|
|
I = NewMI;
|
|
return;
|
|
}
|
|
|
|
// lea offset(%base,%index,scale), %dst =>
|
|
// lea offset( ,%index,scale), %dst; add %base,%dst
|
|
NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
|
|
.add(Dest)
|
|
.addReg(0)
|
|
.add(Scale)
|
|
.add(Index)
|
|
.add(Offset)
|
|
.add(Segment);
|
|
LLVM_DEBUG(NewMI->dump(););
|
|
|
|
unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
|
|
NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
|
|
.addReg(DestReg)
|
|
.add(Base);
|
|
LLVM_DEBUG(NewMI->dump(););
|
|
|
|
MBB.erase(I);
|
|
I = NewMI;
|
|
}
|