forked from OSchip/llvm-project
LEA code size optimization pass (Part 2): Remove redundant LEA instructions.
Make x86 OptimizeLEAs pass remove LEA instruction if there is another LEA (in the same basic block) which calculates address differing only be a displacement. Works only for -Oz. Differential Revision: http://reviews.llvm.org/D13295 llvm-svn: 257589
This commit is contained in:
parent
253dbf5405
commit
1ce2c9973f
|
@ -54,7 +54,8 @@ FunctionPass *createX86PadShortFunctions();
|
|||
/// instructions, in order to eliminate execution delays in some processors.
|
||||
FunctionPass *createX86FixupLEAs();
|
||||
|
||||
/// Return a pass that removes redundant address recalculations.
|
||||
/// Return a pass that removes redundant LEA instructions and redundant address
|
||||
/// recalculations.
|
||||
FunctionPass *createX86OptimizeLEAs();
|
||||
|
||||
/// Return a pass that optimizes the code-size of x86 call sequences. This is
|
||||
|
|
|
@ -9,8 +9,10 @@
|
|||
//
|
||||
// This file defines the pass that performs some optimizations with LEA
|
||||
// instructions in order to improve code size.
|
||||
// Currently, it does one thing:
|
||||
// 1) Address calculations in load and store instructions are replaced by
|
||||
// Currently, it does two things:
|
||||
// 1) If there are two LEA instructions calculating addresses which only differ
|
||||
// by displacement inside a basic block, one of them is removed.
|
||||
// 2) Address calculations in load and store instructions are replaced by
|
||||
// existing LEA def registers where possible.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -38,6 +40,7 @@ static cl::opt<bool> EnableX86LEAOpt("enable-x86-lea-opt", cl::Hidden,
|
|||
cl::init(false));
|
||||
|
||||
STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
|
||||
STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
|
||||
|
||||
namespace {
|
||||
class OptimizeLEAPass : public MachineFunctionPass {
|
||||
|
@ -71,6 +74,13 @@ private:
|
|||
/// \brief Returns true if the instruction is LEA.
|
||||
bool isLEA(const MachineInstr &MI);
|
||||
|
||||
/// \brief Returns true if the \p Last LEA instruction can be replaced by the
|
||||
/// \p First. The difference between displacements of the addresses calculated
|
||||
/// by these LEAs is returned in \p AddrDispShift. It'll be used for proper
|
||||
/// replacement of the \p Last LEA's uses with the \p First's def register.
|
||||
bool isReplaceable(const MachineInstr &First, const MachineInstr &Last,
|
||||
int64_t &AddrDispShift);
|
||||
|
||||
/// \brief Returns true if two instructions have memory operands that only
|
||||
/// differ by displacement. The numbers of the first memory operands for both
|
||||
/// instructions are specified through \p N1 and \p N2. The address
|
||||
|
@ -88,6 +98,9 @@ private:
|
|||
/// \brief Removes redundant address calculations.
|
||||
bool removeRedundantAddrCalc(const SmallVectorImpl<MachineInstr *> &List);
|
||||
|
||||
/// \brief Removes LEAs which calculate similar addresses.
|
||||
bool removeRedundantLEAs(SmallVectorImpl<MachineInstr *> &List);
|
||||
|
||||
DenseMap<const MachineInstr *, unsigned> InstrPos;
|
||||
|
||||
MachineRegisterInfo *MRI;
|
||||
|
@ -194,6 +207,69 @@ bool OptimizeLEAPass::isLEA(const MachineInstr &MI) {
|
|||
Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
|
||||
}
|
||||
|
||||
// Check that the Last LEA can be replaced by the First LEA. To be so,
|
||||
// these requirements must be met:
|
||||
// 1) Addresses calculated by LEAs differ only by displacement.
|
||||
// 2) Def registers of LEAs belong to the same class.
|
||||
// 3) All uses of the Last LEA def register are replaceable, thus the
|
||||
// register is used only as address base.
|
||||
bool OptimizeLEAPass::isReplaceable(const MachineInstr &First,
|
||||
const MachineInstr &Last,
|
||||
int64_t &AddrDispShift) {
|
||||
assert(isLEA(First) && isLEA(Last) &&
|
||||
"The function works only with LEA instructions");
|
||||
|
||||
// Compare instructions' memory operands.
|
||||
if (!isSimilarMemOp(Last, 1, First, 1, AddrDispShift))
|
||||
return false;
|
||||
|
||||
// Make sure that LEA def registers belong to the same class. There may be
|
||||
// instructions (like MOV8mr_NOREX) which allow a limited set of registers to
|
||||
// be used as their operands, so we must be sure that replacing one LEA
|
||||
// with another won't lead to putting a wrong register in the instruction.
|
||||
if (MRI->getRegClass(First.getOperand(0).getReg()) !=
|
||||
MRI->getRegClass(Last.getOperand(0).getReg()))
|
||||
return false;
|
||||
|
||||
// Loop over all uses of the Last LEA to check that its def register is
|
||||
// used only as address base for memory accesses. If so, it can be
|
||||
// replaced, otherwise - no.
|
||||
for (auto &MO : MRI->use_operands(Last.getOperand(0).getReg())) {
|
||||
MachineInstr &MI = *MO.getParent();
|
||||
|
||||
// Get the number of the first memory operand.
|
||||
const MCInstrDesc &Desc = MI.getDesc();
|
||||
int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode());
|
||||
|
||||
// If the use instruction has no memory operand - the LEA is not
|
||||
// replaceable.
|
||||
if (MemOpNo < 0)
|
||||
return false;
|
||||
|
||||
MemOpNo += X86II::getOperandBias(Desc);
|
||||
|
||||
// If the address base of the use instruction is not the LEA def register -
|
||||
// the LEA is not replaceable.
|
||||
if (!isIdenticalOp(MI.getOperand(MemOpNo + X86::AddrBaseReg), MO))
|
||||
return false;
|
||||
|
||||
// If the LEA def register is used as any other operand of the use
|
||||
// instruction - the LEA is not replaceable.
|
||||
for (unsigned i = 0; i < MI.getNumOperands(); i++)
|
||||
if (i != (unsigned)(MemOpNo + X86::AddrBaseReg) &&
|
||||
isIdenticalOp(MI.getOperand(i), MO))
|
||||
return false;
|
||||
|
||||
// Check that the new address displacement will fit 4 bytes.
|
||||
if (MI.getOperand(MemOpNo + X86::AddrDisp).isImm() &&
|
||||
!isInt<32>(MI.getOperand(MemOpNo + X86::AddrDisp).getImm() +
|
||||
AddrDispShift))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if MI1 and MI2 have memory operands which represent addresses that
|
||||
// differ only by displacement.
|
||||
bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1,
|
||||
|
@ -316,6 +392,81 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(
|
|||
return Changed;
|
||||
}
|
||||
|
||||
// Try to find similar LEAs in the list and replace one with another.
|
||||
bool
|
||||
OptimizeLEAPass::removeRedundantLEAs(SmallVectorImpl<MachineInstr *> &List) {
|
||||
bool Changed = false;
|
||||
|
||||
// Loop over all LEA pairs.
|
||||
auto I1 = List.begin();
|
||||
while (I1 != List.end()) {
|
||||
MachineInstr &First = **I1;
|
||||
auto I2 = std::next(I1);
|
||||
while (I2 != List.end()) {
|
||||
MachineInstr &Last = **I2;
|
||||
int64_t AddrDispShift;
|
||||
|
||||
// LEAs should be in occurence order in the list, so we can freely
|
||||
// replace later LEAs with earlier ones.
|
||||
assert(calcInstrDist(First, Last) > 0 &&
|
||||
"LEAs must be in occurence order in the list");
|
||||
|
||||
// Check that the Last LEA instruction can be replaced by the First.
|
||||
if (!isReplaceable(First, Last, AddrDispShift)) {
|
||||
++I2;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Loop over all uses of the Last LEA and update their operands. Note that
|
||||
// the correctness of this has already been checked in the isReplaceable
|
||||
// function.
|
||||
for (auto UI = MRI->use_begin(Last.getOperand(0).getReg()),
|
||||
UE = MRI->use_end();
|
||||
UI != UE;) {
|
||||
MachineOperand &MO = *UI++;
|
||||
MachineInstr &MI = *MO.getParent();
|
||||
|
||||
// Get the number of the first memory operand.
|
||||
const MCInstrDesc &Desc = MI.getDesc();
|
||||
int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) +
|
||||
X86II::getOperandBias(Desc);
|
||||
|
||||
// Update address base.
|
||||
MO.setReg(First.getOperand(0).getReg());
|
||||
|
||||
// Update address disp.
|
||||
MachineOperand *Op = &MI.getOperand(MemOpNo + X86::AddrDisp);
|
||||
if (Op->isImm())
|
||||
Op->setImm(Op->getImm() + AddrDispShift);
|
||||
else if (Op->isGlobal())
|
||||
Op->setOffset(Op->getOffset() + AddrDispShift);
|
||||
else
|
||||
llvm_unreachable("Invalid address displacement operand");
|
||||
}
|
||||
|
||||
// Since we can possibly extend register lifetime, clear kill flags.
|
||||
MRI->clearKillFlags(First.getOperand(0).getReg());
|
||||
|
||||
++NumRedundantLEAs;
|
||||
DEBUG(dbgs() << "OptimizeLEAs: Remove redundant LEA: "; Last.dump(););
|
||||
|
||||
// By this moment, all of the Last LEA's uses must be replaced. So we can
|
||||
// freely remove it.
|
||||
assert(MRI->use_empty(Last.getOperand(0).getReg()) &&
|
||||
"The LEA's def register must have no uses");
|
||||
Last.eraseFromParent();
|
||||
|
||||
// Erase removed LEA from the list.
|
||||
I2 = List.erase(I2);
|
||||
|
||||
Changed = true;
|
||||
}
|
||||
++I1;
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
bool Changed = false;
|
||||
|
||||
|
@ -339,6 +490,11 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
|
|||
if (LEAs.empty())
|
||||
continue;
|
||||
|
||||
// Remove redundant LEA instructions. The optimization may have a negative
|
||||
// effect on performance, so do it only for -Oz.
|
||||
if (MF.getFunction()->optForMinSize())
|
||||
Changed |= removeRedundantLEAs(LEAs);
|
||||
|
||||
// Remove redundant address calculations.
|
||||
Changed |= removeRedundantAddrCalc(LEAs);
|
||||
}
|
||||
|
|
|
@ -129,3 +129,41 @@ sw.epilog: ; preds = %sw.bb.2, %sw.bb.1,
|
|||
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG3]])
|
||||
}
|
||||
|
||||
define void @test4(i64 %x) nounwind minsize {
|
||||
entry:
|
||||
%a = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 0
|
||||
%tmp = load i32, i32* %a, align 4
|
||||
%b = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 1
|
||||
%tmp1 = load i32, i32* %b, align 4
|
||||
%sub = sub i32 %tmp, %tmp1
|
||||
%c = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 2
|
||||
%tmp2 = load i32, i32* %c, align 4
|
||||
%add = add nsw i32 %sub, %tmp2
|
||||
switch i32 %add, label %sw.epilog [
|
||||
i32 1, label %sw.bb.1
|
||||
i32 2, label %sw.bb.2
|
||||
]
|
||||
|
||||
sw.bb.1: ; preds = %entry
|
||||
store i32 111, i32* %b, align 4
|
||||
store i32 222, i32* %c, align 4
|
||||
br label %sw.epilog
|
||||
|
||||
sw.bb.2: ; preds = %entry
|
||||
store i32 333, i32* %b, align 4
|
||||
store i32 444, i32* %c, align 4
|
||||
br label %sw.epilog
|
||||
|
||||
sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry
|
||||
ret void
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: leaq arr1+4({{.*}}), [[REG2:%[a-z]+]]
|
||||
; CHECK: movl -4([[REG2]]), {{.*}}
|
||||
; CHECK: subl ([[REG2]]), {{.*}}
|
||||
; CHECK: addl 4([[REG2]]), {{.*}}
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
|
||||
; CHECK: movl ${{[1-4]+}}, 4([[REG2]])
|
||||
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
|
||||
; CHECK: movl ${{[1-4]+}}, 4([[REG2]])
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue