forked from OSchip/llvm-project
Revert "[X86] Improvement in CodeGen instruction selection for LEAs."
This reverts r319543, due to ASan bot breakage. llvm-svn: 319591
This commit is contained in:
parent
de9bafb162
commit
9e658c974b
|
@ -1320,13 +1320,12 @@ public:
|
||||||
/// Add all implicit def and use operands to this instruction.
|
/// Add all implicit def and use operands to this instruction.
|
||||||
void addImplicitDefUseOperands(MachineFunction &MF);
|
void addImplicitDefUseOperands(MachineFunction &MF);
|
||||||
|
|
||||||
|
private:
|
||||||
/// If this instruction is embedded into a MachineFunction, return the
|
/// If this instruction is embedded into a MachineFunction, return the
|
||||||
/// MachineRegisterInfo object for the current function, otherwise
|
/// MachineRegisterInfo object for the current function, otherwise
|
||||||
/// return null.
|
/// return null.
|
||||||
MachineRegisterInfo *getRegInfo();
|
MachineRegisterInfo *getRegInfo();
|
||||||
|
|
||||||
private:
|
|
||||||
|
|
||||||
/// Unlink all of the register operands in this instruction from their
|
/// Unlink all of the register operands in this instruction from their
|
||||||
/// respective use lists. This requires that the operands already be on their
|
/// respective use lists. This requires that the operands already be on their
|
||||||
/// use lists.
|
/// use lists.
|
||||||
|
|
|
@ -300,9 +300,6 @@ public:
|
||||||
/// type legalization.
|
/// type legalization.
|
||||||
bool NewNodesMustHaveLegalTypes = false;
|
bool NewNodesMustHaveLegalTypes = false;
|
||||||
|
|
||||||
/// Set to true for DAG of BasicBlock contained inside a loop.
|
|
||||||
bool IsDAGPartOfLoop = false;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// DAGUpdateListener is a friend so it can manipulate the listener stack.
|
/// DAGUpdateListener is a friend so it can manipulate the listener stack.
|
||||||
friend struct DAGUpdateListener;
|
friend struct DAGUpdateListener;
|
||||||
|
|
|
@ -27,7 +27,6 @@
|
||||||
#include "llvm/Analysis/AliasAnalysis.h"
|
#include "llvm/Analysis/AliasAnalysis.h"
|
||||||
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
#include "llvm/Analysis/BranchProbabilityInfo.h"
|
||||||
#include "llvm/Analysis/CFG.h"
|
#include "llvm/Analysis/CFG.h"
|
||||||
#include "llvm/Analysis/LoopInfo.h"
|
|
||||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
||||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||||
#include "llvm/CodeGen/FastISel.h"
|
#include "llvm/CodeGen/FastISel.h"
|
||||||
|
@ -326,8 +325,6 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||||
if (OptLevel != CodeGenOpt::None)
|
if (OptLevel != CodeGenOpt::None)
|
||||||
AU.addRequired<AAResultsWrapperPass>();
|
AU.addRequired<AAResultsWrapperPass>();
|
||||||
AU.addRequired<GCModuleInfo>();
|
AU.addRequired<GCModuleInfo>();
|
||||||
if (OptLevel != CodeGenOpt::None)
|
|
||||||
AU.addRequired<LoopInfoWrapperPass>();
|
|
||||||
AU.addRequired<StackProtector>();
|
AU.addRequired<StackProtector>();
|
||||||
AU.addPreserved<StackProtector>();
|
AU.addPreserved<StackProtector>();
|
||||||
AU.addPreserved<GCModuleInfo>();
|
AU.addPreserved<GCModuleInfo>();
|
||||||
|
@ -1418,7 +1415,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
|
||||||
|
|
||||||
// Iterate over all basic blocks in the function.
|
// Iterate over all basic blocks in the function.
|
||||||
for (const BasicBlock *LLVMBB : RPOT) {
|
for (const BasicBlock *LLVMBB : RPOT) {
|
||||||
CurDAG->IsDAGPartOfLoop = false;
|
|
||||||
if (OptLevel != CodeGenOpt::None) {
|
if (OptLevel != CodeGenOpt::None) {
|
||||||
bool AllPredsVisited = true;
|
bool AllPredsVisited = true;
|
||||||
for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
|
for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
|
||||||
|
@ -1596,13 +1592,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
|
||||||
FunctionBasedInstrumentation);
|
FunctionBasedInstrumentation);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (OptLevel != CodeGenOpt::None) {
|
|
||||||
auto &LIWP = getAnalysis<LoopInfoWrapperPass>();
|
|
||||||
LoopInfo &LI = LIWP.getLoopInfo();
|
|
||||||
if (LI.getLoopFor(LLVMBB))
|
|
||||||
CurDAG->IsDAGPartOfLoop = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Begin != BI)
|
if (Begin != BI)
|
||||||
++NumDAGBlocks;
|
++NumDAGBlocks;
|
||||||
else
|
else
|
||||||
|
|
|
@ -88,11 +88,6 @@ namespace {
|
||||||
IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
|
IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool hasComplexAddressingMode() const {
|
|
||||||
return Disp && IndexReg.getNode() != nullptr &&
|
|
||||||
Base_Reg.getNode() != nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return true if this addressing mode is already RIP-relative.
|
/// Return true if this addressing mode is already RIP-relative.
|
||||||
bool isRIPRelative() const {
|
bool isRIPRelative() const {
|
||||||
if (BaseType != RegBase) return false;
|
if (BaseType != RegBase) return false;
|
||||||
|
@ -102,10 +97,6 @@ namespace {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isLegalScale() const {
|
|
||||||
return (Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
void setBaseReg(SDValue Reg) {
|
void setBaseReg(SDValue Reg) {
|
||||||
BaseType = RegBase;
|
BaseType = RegBase;
|
||||||
Base_Reg = Reg;
|
Base_Reg = Reg;
|
||||||
|
@ -171,13 +162,10 @@ namespace {
|
||||||
/// If true, selector should try to optimize for minimum code size.
|
/// If true, selector should try to optimize for minimum code size.
|
||||||
bool OptForMinSize;
|
bool OptForMinSize;
|
||||||
|
|
||||||
/// If true, selector should try to aggresively fold operands into AM.
|
|
||||||
bool OptForAggressingFolding;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
|
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
|
||||||
: SelectionDAGISel(tm, OptLevel), OptForSize(false),
|
: SelectionDAGISel(tm, OptLevel), OptForSize(false),
|
||||||
OptForMinSize(false), OptForAggressingFolding(false) {}
|
OptForMinSize(false) {}
|
||||||
|
|
||||||
StringRef getPassName() const override {
|
StringRef getPassName() const override {
|
||||||
return "X86 DAG->DAG Instruction Selection";
|
return "X86 DAG->DAG Instruction Selection";
|
||||||
|
@ -196,12 +184,6 @@ namespace {
|
||||||
|
|
||||||
void PreprocessISelDAG() override;
|
void PreprocessISelDAG() override;
|
||||||
|
|
||||||
void setAggressiveOperandFolding(bool val) {
|
|
||||||
OptForAggressingFolding = val;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool getAggressiveOperandFolding() { return OptForAggressingFolding; }
|
|
||||||
|
|
||||||
// Include the pieces autogenerated from the target description.
|
// Include the pieces autogenerated from the target description.
|
||||||
#include "X86GenDAGISel.inc"
|
#include "X86GenDAGISel.inc"
|
||||||
|
|
||||||
|
@ -216,7 +198,6 @@ namespace {
|
||||||
bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
|
bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
|
||||||
bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
|
bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
|
||||||
unsigned Depth);
|
unsigned Depth);
|
||||||
bool matchAddressLEA(SDValue N, X86ISelAddressMode &AM);
|
|
||||||
bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
|
bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
|
||||||
bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
|
bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
|
||||||
SDValue &Scale, SDValue &Index, SDValue &Disp,
|
SDValue &Scale, SDValue &Index, SDValue &Disp,
|
||||||
|
@ -466,20 +447,6 @@ namespace {
|
||||||
|
|
||||||
bool isMaskZeroExtended(SDNode *N) const;
|
bool isMaskZeroExtended(SDNode *N) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
class X86AggressiveOperandFolding {
|
|
||||||
public:
|
|
||||||
explicit X86AggressiveOperandFolding(X86DAGToDAGISel &ISel, bool val)
|
|
||||||
: Selector(&ISel) {
|
|
||||||
Selector->setAggressiveOperandFolding(val);
|
|
||||||
}
|
|
||||||
~X86AggressiveOperandFolding() {
|
|
||||||
Selector->setAggressiveOperandFolding(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
X86DAGToDAGISel *Selector;
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1235,14 +1202,8 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
|
||||||
dbgs() << "MatchAddress: ";
|
dbgs() << "MatchAddress: ";
|
||||||
AM.dump();
|
AM.dump();
|
||||||
});
|
});
|
||||||
|
// Limit recursion.
|
||||||
// Limit recursion. For aggressive operand folding recurse
|
if (Depth > 5)
|
||||||
// till depth 8 which is the maximum legal scale value.
|
|
||||||
auto getMaxOperandFoldingDepth = [&] () -> unsigned int {
|
|
||||||
return getAggressiveOperandFolding() ? 8 : 5;
|
|
||||||
};
|
|
||||||
|
|
||||||
if (Depth > getMaxOperandFoldingDepth())
|
|
||||||
return matchAddressBase(N, AM);
|
return matchAddressBase(N, AM);
|
||||||
|
|
||||||
// If this is already a %rip relative address, we can only merge immediates
|
// If this is already a %rip relative address, we can only merge immediates
|
||||||
|
@ -1533,20 +1494,6 @@ bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (OptLevel != CodeGenOpt::None && getAggressiveOperandFolding() &&
|
|
||||||
AM.BaseType == X86ISelAddressMode::RegBase) {
|
|
||||||
if (AM.Base_Reg == N) {
|
|
||||||
SDValue Base_Reg = AM.Base_Reg;
|
|
||||||
AM.Base_Reg = AM.IndexReg;
|
|
||||||
AM.IndexReg = Base_Reg;
|
|
||||||
AM.Scale++;
|
|
||||||
return false;
|
|
||||||
} else if (AM.IndexReg == N) {
|
|
||||||
AM.Scale++;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise, we cannot select it.
|
// Otherwise, we cannot select it.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -1817,29 +1764,6 @@ bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool X86DAGToDAGISel::matchAddressLEA(SDValue N, X86ISelAddressMode &AM) {
|
|
||||||
// Avoid enabling aggressive operand folding when node N is a part of loop.
|
|
||||||
X86AggressiveOperandFolding Enable(*this, !CurDAG->IsDAGPartOfLoop);
|
|
||||||
|
|
||||||
bool matchRes = matchAddress(N, AM);
|
|
||||||
|
|
||||||
// Check for legality of scale when recursion unwinds back to the top.
|
|
||||||
if (!matchRes) {
|
|
||||||
if (!AM.isLegalScale())
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// Avoid creating costly complex LEAs having scale less than 2
|
|
||||||
// within loop.
|
|
||||||
if(CurDAG->IsDAGPartOfLoop && Subtarget->slow3OpsLEA() &&
|
|
||||||
AM.Scale <= 2 && AM.hasComplexAddressingMode() &&
|
|
||||||
(!AM.hasSymbolicDisplacement() && N.getOpcode() < ISD::BUILTIN_OP_END))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return matchRes;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Calls SelectAddr and determines if the maximal addressing
|
/// Calls SelectAddr and determines if the maximal addressing
|
||||||
/// mode it matches can be cost effectively emitted as an LEA instruction.
|
/// mode it matches can be cost effectively emitted as an LEA instruction.
|
||||||
bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
|
bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
|
||||||
|
@ -1857,7 +1781,7 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
|
||||||
SDValue Copy = AM.Segment;
|
SDValue Copy = AM.Segment;
|
||||||
SDValue T = CurDAG->getRegister(0, MVT::i32);
|
SDValue T = CurDAG->getRegister(0, MVT::i32);
|
||||||
AM.Segment = T;
|
AM.Segment = T;
|
||||||
if (matchAddressLEA(N, AM))
|
if (matchAddress(N, AM))
|
||||||
return false;
|
return false;
|
||||||
assert (T == AM.Segment);
|
assert (T == AM.Segment);
|
||||||
AM.Segment = Copy;
|
AM.Segment = Copy;
|
||||||
|
|
|
@ -27,7 +27,6 @@
|
||||||
#include "llvm/ADT/SmallVector.h"
|
#include "llvm/ADT/SmallVector.h"
|
||||||
#include "llvm/ADT/Statistic.h"
|
#include "llvm/ADT/Statistic.h"
|
||||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||||
#include "llvm/CodeGen/MachineDominators.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
#include "llvm/CodeGen/MachineFunction.h"
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
#include "llvm/CodeGen/MachineInstr.h"
|
#include "llvm/CodeGen/MachineInstr.h"
|
||||||
|
@ -59,7 +58,6 @@ static cl::opt<bool>
|
||||||
cl::init(false));
|
cl::init(false));
|
||||||
|
|
||||||
STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
|
STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
|
||||||
STATISTIC(NumFactoredLEAs, "Number of LEAs factorized");
|
|
||||||
STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
|
STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
|
||||||
|
|
||||||
/// \brief Returns true if two machine operands are identical and they are not
|
/// \brief Returns true if two machine operands are identical and they are not
|
||||||
|
@ -67,10 +65,6 @@ STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
|
||||||
static inline bool isIdenticalOp(const MachineOperand &MO1,
|
static inline bool isIdenticalOp(const MachineOperand &MO1,
|
||||||
const MachineOperand &MO2);
|
const MachineOperand &MO2);
|
||||||
|
|
||||||
/// \brief Returns true if two machine instructions have identical operands.
|
|
||||||
static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1,
|
|
||||||
const MachineOperand &MO2);
|
|
||||||
|
|
||||||
/// \brief Returns true if two address displacement operands are of the same
|
/// \brief Returns true if two address displacement operands are of the same
|
||||||
/// type and use the same symbol/index/address regardless of the offset.
|
/// type and use the same symbol/index/address regardless of the offset.
|
||||||
static bool isSimilarDispOp(const MachineOperand &MO1,
|
static bool isSimilarDispOp(const MachineOperand &MO1,
|
||||||
|
@ -79,9 +73,6 @@ static bool isSimilarDispOp(const MachineOperand &MO1,
|
||||||
/// \brief Returns true if the instruction is LEA.
|
/// \brief Returns true if the instruction is LEA.
|
||||||
static inline bool isLEA(const MachineInstr &MI);
|
static inline bool isLEA(const MachineInstr &MI);
|
||||||
|
|
||||||
/// \brief Returns true if Definition of Operand is a copylike instruction.
|
|
||||||
static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr);
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
/// A key based on instruction's memory operands.
|
/// A key based on instruction's memory operands.
|
||||||
|
@ -89,35 +80,15 @@ class MemOpKey {
|
||||||
public:
|
public:
|
||||||
MemOpKey(const MachineOperand *Base, const MachineOperand *Scale,
|
MemOpKey(const MachineOperand *Base, const MachineOperand *Scale,
|
||||||
const MachineOperand *Index, const MachineOperand *Segment,
|
const MachineOperand *Index, const MachineOperand *Segment,
|
||||||
const MachineOperand *Disp, bool DispCheck = false)
|
const MachineOperand *Disp)
|
||||||
: Disp(Disp), DeepCheck(DispCheck) {
|
: Disp(Disp) {
|
||||||
Operands[0] = Base;
|
Operands[0] = Base;
|
||||||
Operands[1] = Scale;
|
Operands[1] = Scale;
|
||||||
Operands[2] = Index;
|
Operands[2] = Index;
|
||||||
Operands[3] = Segment;
|
Operands[3] = Segment;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks operands of MemOpKey are identical, if Base or Index
|
|
||||||
/// operand definitions are of kind SUBREG_TO_REG then compare
|
|
||||||
/// operands of defining MI.
|
|
||||||
bool performDeepCheck(const MemOpKey &Other) const {
|
|
||||||
MachineInstr *MI = const_cast<MachineInstr *>(Operands[0]->getParent());
|
|
||||||
MachineRegisterInfo *MRI = MI->getRegInfo();
|
|
||||||
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
bool CopyLike = isDefCopyLike(MRI, *Operands[i]);
|
|
||||||
if (CopyLike && !isIdenticalMI(MRI, *Operands[i], *Other.Operands[i]))
|
|
||||||
return false;
|
|
||||||
else if (!CopyLike && !isIdenticalOp(*Operands[i], *Other.Operands[i]))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return isIdenticalOp(*Disp, *Other.Disp);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator==(const MemOpKey &Other) const {
|
bool operator==(const MemOpKey &Other) const {
|
||||||
if (DeepCheck)
|
|
||||||
return performDeepCheck(Other);
|
|
||||||
|
|
||||||
// Addresses' bases, scales, indices and segments must be identical.
|
// Addresses' bases, scales, indices and segments must be identical.
|
||||||
for (int i = 0; i < 4; ++i)
|
for (int i = 0; i < 4; ++i)
|
||||||
if (!isIdenticalOp(*Operands[i], *Other.Operands[i]))
|
if (!isIdenticalOp(*Operands[i], *Other.Operands[i]))
|
||||||
|
@ -135,12 +106,6 @@ public:
|
||||||
|
|
||||||
// Address' displacement operand.
|
// Address' displacement operand.
|
||||||
const MachineOperand *Disp;
|
const MachineOperand *Disp;
|
||||||
|
|
||||||
// If true checks Address' base, index, segment and
|
|
||||||
// displacement are identical, in additions if base/index
|
|
||||||
// are defined by copylike instruction then futher
|
|
||||||
// compare the operands of the defining instruction.
|
|
||||||
bool DeepCheck;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end anonymous namespace
|
} // end anonymous namespace
|
||||||
|
@ -166,34 +131,12 @@ template <> struct DenseMapInfo<MemOpKey> {
|
||||||
static unsigned getHashValue(const MemOpKey &Val) {
|
static unsigned getHashValue(const MemOpKey &Val) {
|
||||||
// Checking any field of MemOpKey is enough to determine if the key is
|
// Checking any field of MemOpKey is enough to determine if the key is
|
||||||
// empty or tombstone.
|
// empty or tombstone.
|
||||||
hash_code Hash(0);
|
|
||||||
assert(Val.Disp != PtrInfo::getEmptyKey() && "Cannot hash the empty key");
|
assert(Val.Disp != PtrInfo::getEmptyKey() && "Cannot hash the empty key");
|
||||||
assert(Val.Disp != PtrInfo::getTombstoneKey() &&
|
assert(Val.Disp != PtrInfo::getTombstoneKey() &&
|
||||||
"Cannot hash the tombstone key");
|
"Cannot hash the tombstone key");
|
||||||
|
|
||||||
auto getMIHash = [](MachineInstr *MI) -> hash_code {
|
hash_code Hash = hash_combine(*Val.Operands[0], *Val.Operands[1],
|
||||||
hash_code h(0);
|
*Val.Operands[2], *Val.Operands[3]);
|
||||||
for (unsigned i = 1, e = MI->getNumOperands(); i < e; i++)
|
|
||||||
h = hash_combine(h, MI->getOperand(i));
|
|
||||||
return h;
|
|
||||||
};
|
|
||||||
|
|
||||||
const MachineOperand &Base = *Val.Operands[0];
|
|
||||||
const MachineOperand &Index = *Val.Operands[2];
|
|
||||||
MachineInstr *MI = const_cast<MachineInstr *>(Base.getParent());
|
|
||||||
MachineRegisterInfo *MRI = MI->getRegInfo();
|
|
||||||
|
|
||||||
if (isDefCopyLike(MRI, Base))
|
|
||||||
Hash = getMIHash(MRI->getVRegDef(Base.getReg()));
|
|
||||||
else
|
|
||||||
Hash = hash_combine(Hash, Base);
|
|
||||||
|
|
||||||
if (isDefCopyLike(MRI, Index))
|
|
||||||
Hash = getMIHash(MRI->getVRegDef(Index.getReg()));
|
|
||||||
else
|
|
||||||
Hash = hash_combine(Hash, Index);
|
|
||||||
|
|
||||||
Hash = hash_combine(Hash, *Val.Operands[1], *Val.Operands[3]);
|
|
||||||
|
|
||||||
// If the address displacement is an immediate, it should not affect the
|
// If the address displacement is an immediate, it should not affect the
|
||||||
// hash so that memory operands which differ only be immediate displacement
|
// hash so that memory operands which differ only be immediate displacement
|
||||||
|
@ -253,16 +196,6 @@ static inline MemOpKey getMemOpKey(const MachineInstr &MI, unsigned N) {
|
||||||
&MI.getOperand(N + X86::AddrDisp));
|
&MI.getOperand(N + X86::AddrDisp));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline MemOpKey getMemOpCSEKey(const MachineInstr &MI, unsigned N) {
|
|
||||||
static MachineOperand DummyScale = MachineOperand::CreateImm(1);
|
|
||||||
assert((isLEA(MI) || MI.mayLoadOrStore()) &&
|
|
||||||
"The instruction must be a LEA, a load or a store");
|
|
||||||
return MemOpKey(&MI.getOperand(N + X86::AddrBaseReg), &DummyScale,
|
|
||||||
&MI.getOperand(N + X86::AddrIndexReg),
|
|
||||||
&MI.getOperand(N + X86::AddrSegmentReg),
|
|
||||||
&MI.getOperand(N + X86::AddrDisp), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool isIdenticalOp(const MachineOperand &MO1,
|
static inline bool isIdenticalOp(const MachineOperand &MO1,
|
||||||
const MachineOperand &MO2) {
|
const MachineOperand &MO2) {
|
||||||
return MO1.isIdenticalTo(MO2) &&
|
return MO1.isIdenticalTo(MO2) &&
|
||||||
|
@ -270,27 +203,6 @@ static inline bool isIdenticalOp(const MachineOperand &MO1,
|
||||||
!TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
|
!TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1,
|
|
||||||
const MachineOperand &MO2) {
|
|
||||||
MachineInstr *MI1 = nullptr;
|
|
||||||
MachineInstr *MI2 = nullptr;
|
|
||||||
if (!MO1.isReg() || !MO2.isReg())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
MI1 = MRI->getVRegDef(MO1.getReg());
|
|
||||||
MI2 = MRI->getVRegDef(MO2.getReg());
|
|
||||||
if (!MI1 || !MI2)
|
|
||||||
return false;
|
|
||||||
if (MI1->getOpcode() != MI2->getOpcode())
|
|
||||||
return false;
|
|
||||||
if (MI1->getNumOperands() != MI2->getNumOperands())
|
|
||||||
return false;
|
|
||||||
for (unsigned i = 1, e = MI1->getNumOperands(); i < e; ++i)
|
|
||||||
if (!isIdenticalOp(MI1->getOperand(i), MI2->getOperand(i)))
|
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
static bool isValidDispOp(const MachineOperand &MO) {
|
static bool isValidDispOp(const MachineOperand &MO) {
|
||||||
return MO.isImm() || MO.isCPI() || MO.isJTI() || MO.isSymbol() ||
|
return MO.isImm() || MO.isCPI() || MO.isJTI() || MO.isSymbol() ||
|
||||||
|
@ -322,150 +234,8 @@ static inline bool isLEA(const MachineInstr &MI) {
|
||||||
Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
|
Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr) {
|
|
||||||
bool isInstrErased = !(Opr.isReg() && Opr.getParent()->getParent());
|
|
||||||
if (!Opr.isReg() || isInstrErased ||
|
|
||||||
TargetRegisterInfo::isPhysicalRegister(Opr.getReg()))
|
|
||||||
return false;
|
|
||||||
MachineInstr *MI = MRI->getVRegDef(Opr.getReg());
|
|
||||||
return MI && MI->isCopyLike();
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
/// This class captures the functions and attributes
|
|
||||||
/// needed to factorize LEA within and across basic
|
|
||||||
/// blocks.LEA instruction with same BASE,OFFSET and
|
|
||||||
/// INDEX are the candidates for factorization.
|
|
||||||
class FactorizeLEAOpt {
|
|
||||||
public:
|
|
||||||
using LEAListT = std::list<MachineInstr *>;
|
|
||||||
using LEAMapT = DenseMap<MemOpKey, LEAListT>;
|
|
||||||
using ValueT = DenseMap<MemOpKey, unsigned>;
|
|
||||||
using ScopeEntryT = std::pair<MachineBasicBlock *, ValueT>;
|
|
||||||
using ScopeStackT = std::vector<ScopeEntryT>;
|
|
||||||
|
|
||||||
FactorizeLEAOpt() = default;
|
|
||||||
FactorizeLEAOpt(const FactorizeLEAOpt &) = delete;
|
|
||||||
FactorizeLEAOpt &operator=(const FactorizeLEAOpt &) = delete;
|
|
||||||
|
|
||||||
void performCleanup() {
|
|
||||||
for (auto LEA : removedLEAs)
|
|
||||||
LEA->eraseFromParent();
|
|
||||||
LEAs.clear();
|
|
||||||
Stack.clear();
|
|
||||||
removedLEAs.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
LEAMapT &getLEAMap() { return LEAs; }
|
|
||||||
ScopeEntryT *getTopScope() { return &Stack.back(); }
|
|
||||||
|
|
||||||
void addForLazyRemoval(MachineInstr *Instr) { removedLEAs.insert(Instr); }
|
|
||||||
|
|
||||||
bool checkIfScheduledForRemoval(MachineInstr *Instr) {
|
|
||||||
return removedLEAs.find(Instr) != removedLEAs.end();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Push the ScopeEntry for the BasicBlock over Stack.
|
|
||||||
/// Also traverses over list of instruction and update
|
|
||||||
/// LEAs Map and ScopeEntry for each LEA instruction
|
|
||||||
/// found using insertLEA().
|
|
||||||
void collectDataForBasicBlock(MachineBasicBlock *MBB);
|
|
||||||
|
|
||||||
/// Stores the size of MachineInstr list corrosponding
|
|
||||||
/// to key K from LEAs MAP into the ScopeEntry of
|
|
||||||
/// the basic block, then insert the LEA at the beginning
|
|
||||||
/// of the list.
|
|
||||||
void insertLEA(MachineInstr *MI);
|
|
||||||
|
|
||||||
/// Pops out ScopeEntry of top most BasicBlock from the stack
|
|
||||||
/// and remove the LEA instructions contained in the scope
|
|
||||||
/// from the LEAs Map.
|
|
||||||
void removeDataForBasicBlock();
|
|
||||||
|
|
||||||
/// If LEA contains Physical Registers then its not a candidate
|
|
||||||
/// for factorizations since physical registers may violate SSA
|
|
||||||
/// semantics of MI.
|
|
||||||
bool containsPhyReg(MachineInstr *MI, unsigned RecLevel);
|
|
||||||
|
|
||||||
private:
|
|
||||||
ScopeStackT Stack;
|
|
||||||
LEAMapT LEAs;
|
|
||||||
std::set<MachineInstr *> removedLEAs;
|
|
||||||
};
|
|
||||||
|
|
||||||
void FactorizeLEAOpt::collectDataForBasicBlock(MachineBasicBlock *MBB) {
|
|
||||||
ValueT EmptyMap;
|
|
||||||
ScopeEntryT SE = std::make_pair(MBB, EmptyMap);
|
|
||||||
Stack.push_back(SE);
|
|
||||||
for (auto &MI : *MBB) {
|
|
||||||
if (isLEA(MI))
|
|
||||||
insertLEA(&MI);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void FactorizeLEAOpt::removeDataForBasicBlock() {
|
|
||||||
ScopeEntryT &SE = Stack.back();
|
|
||||||
for (auto MapEntry : SE.second) {
|
|
||||||
LEAMapT::iterator Itr = LEAs.find(MapEntry.first);
|
|
||||||
assert((Itr != LEAs.end()) &&
|
|
||||||
"LEAs map must have a node corresponding to ScopeEntry's Key.");
|
|
||||||
|
|
||||||
while (((*Itr).second.size() > MapEntry.second))
|
|
||||||
(*Itr).second.pop_front();
|
|
||||||
// If list goes empty remove entry from LEAs Map.
|
|
||||||
if ((*Itr).second.empty())
|
|
||||||
LEAs.erase(Itr);
|
|
||||||
}
|
|
||||||
Stack.pop_back();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool FactorizeLEAOpt::containsPhyReg(MachineInstr *MI, unsigned RecLevel) {
|
|
||||||
if (!MI || !RecLevel)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
MachineRegisterInfo *MRI = MI->getRegInfo();
|
|
||||||
for (auto Operand : MI->operands()) {
|
|
||||||
if (!Operand.isReg())
|
|
||||||
continue;
|
|
||||||
if (TargetRegisterInfo::isPhysicalRegister(Operand.getReg()))
|
|
||||||
return true;
|
|
||||||
MachineInstr *OperDefMI = MRI->getVRegDef(Operand.getReg());
|
|
||||||
if (OperDefMI && (MI != OperDefMI) && OperDefMI->isCopyLike() &&
|
|
||||||
containsPhyReg(OperDefMI, RecLevel - 1))
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void FactorizeLEAOpt::insertLEA(MachineInstr *MI) {
|
|
||||||
unsigned lsize;
|
|
||||||
if (containsPhyReg(MI, 2))
|
|
||||||
return;
|
|
||||||
|
|
||||||
// Factorization is beneficial only for complex LEAs.
|
|
||||||
MachineOperand &Base = MI->getOperand(1);
|
|
||||||
MachineOperand &Index = MI->getOperand(3);
|
|
||||||
MachineOperand &Offset = MI->getOperand(4);
|
|
||||||
if ((Offset.isImm() && !Offset.getImm()) ||
|
|
||||||
(!Base.isReg() || !Base.getReg()) || (!Index.isReg() || !Index.getReg()))
|
|
||||||
return;
|
|
||||||
|
|
||||||
MemOpKey Key = getMemOpCSEKey(*MI, 1);
|
|
||||||
ScopeEntryT *TopScope = getTopScope();
|
|
||||||
|
|
||||||
LEAMapT::iterator Itr = LEAs.find(Key);
|
|
||||||
if (Itr == LEAs.end()) {
|
|
||||||
lsize = 0;
|
|
||||||
LEAs[Key].push_front(MI);
|
|
||||||
} else {
|
|
||||||
lsize = (*Itr).second.size();
|
|
||||||
(*Itr).second.push_front(MI);
|
|
||||||
}
|
|
||||||
if (TopScope->second.find(Key) == TopScope->second.end())
|
|
||||||
TopScope->second[Key] = lsize;
|
|
||||||
}
|
|
||||||
|
|
||||||
class OptimizeLEAPass : public MachineFunctionPass {
|
class OptimizeLEAPass : public MachineFunctionPass {
|
||||||
public:
|
public:
|
||||||
OptimizeLEAPass() : MachineFunctionPass(ID) {}
|
OptimizeLEAPass() : MachineFunctionPass(ID) {}
|
||||||
|
@ -477,12 +247,6 @@ public:
|
||||||
/// been calculated by LEA. Also, remove redundant LEAs.
|
/// been calculated by LEA. Also, remove redundant LEAs.
|
||||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||||
|
|
||||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
||||||
AU.setPreservesCFG();
|
|
||||||
MachineFunctionPass::getAnalysisUsage(AU);
|
|
||||||
AU.addRequired<MachineDominatorTree>();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
using MemOpMap = DenseMap<MemOpKey, SmallVector<MachineInstr *, 16>>;
|
using MemOpMap = DenseMap<MemOpKey, SmallVector<MachineInstr *, 16>>;
|
||||||
|
|
||||||
|
@ -528,24 +292,8 @@ private:
|
||||||
/// \brief Removes LEAs which calculate similar addresses.
|
/// \brief Removes LEAs which calculate similar addresses.
|
||||||
bool removeRedundantLEAs(MemOpMap &LEAs);
|
bool removeRedundantLEAs(MemOpMap &LEAs);
|
||||||
|
|
||||||
/// \brief Visit over basic blocks, collect LEAs in a scoped
|
|
||||||
/// hash map (FactorizeLEAOpt::LEAs) and try to factor them out.
|
|
||||||
bool FactorizeLEAsAllBasicBlocks(MachineFunction &MF);
|
|
||||||
|
|
||||||
bool FactorizeLEAsBasicBlock(MachineDomTreeNode *DN);
|
|
||||||
|
|
||||||
/// \brief Factor out LEAs which share Base,Index,Offset and Segment.
|
|
||||||
bool processBasicBlock(const MachineBasicBlock &MBB);
|
|
||||||
|
|
||||||
/// \brief Try to replace LEA with a lower strength instruction
|
|
||||||
/// to improves latency and throughput.
|
|
||||||
bool strengthReduceLEAs(MemOpMap &LEAs, const MachineBasicBlock &MBB);
|
|
||||||
|
|
||||||
DenseMap<const MachineInstr *, unsigned> InstrPos;
|
DenseMap<const MachineInstr *, unsigned> InstrPos;
|
||||||
|
|
||||||
FactorizeLEAOpt FactorOpt;
|
|
||||||
|
|
||||||
MachineDominatorTree *DT;
|
|
||||||
MachineRegisterInfo *MRI;
|
MachineRegisterInfo *MRI;
|
||||||
const X86InstrInfo *TII;
|
const X86InstrInfo *TII;
|
||||||
const X86RegisterInfo *TRI;
|
const X86RegisterInfo *TRI;
|
||||||
|
@ -741,9 +489,7 @@ void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs) {
|
||||||
bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
|
bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
|
||||||
bool Changed = false;
|
bool Changed = false;
|
||||||
|
|
||||||
if (LEAs.empty())
|
assert(!LEAs.empty());
|
||||||
return Changed;
|
|
||||||
|
|
||||||
MachineBasicBlock *MBB = (*LEAs.begin()->second.begin())->getParent();
|
MachineBasicBlock *MBB = (*LEAs.begin()->second.begin())->getParent();
|
||||||
|
|
||||||
// Process all instructions in basic block.
|
// Process all instructions in basic block.
|
||||||
|
@ -913,10 +659,6 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
|
||||||
// Erase removed LEA from the list.
|
// Erase removed LEA from the list.
|
||||||
I2 = List.erase(I2);
|
I2 = List.erase(I2);
|
||||||
|
|
||||||
// If List becomes empty remove it from LEAs map.
|
|
||||||
if (List.empty())
|
|
||||||
LEAs.erase(E.first);
|
|
||||||
|
|
||||||
Changed = true;
|
Changed = true;
|
||||||
}
|
}
|
||||||
++I1;
|
++I1;
|
||||||
|
@ -926,217 +668,6 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
|
||||||
return Changed;
|
return Changed;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int getADDrrFromLEA(int LEAOpcode) {
|
|
||||||
switch (LEAOpcode) {
|
|
||||||
default:
|
|
||||||
llvm_unreachable("Unexpected LEA instruction");
|
|
||||||
case X86::LEA16r:
|
|
||||||
return X86::ADD16rr;
|
|
||||||
case X86::LEA32r:
|
|
||||||
return X86::ADD32rr;
|
|
||||||
case X86::LEA64_32r:
|
|
||||||
case X86::LEA64r:
|
|
||||||
return X86::ADD64rr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool OptimizeLEAPass::strengthReduceLEAs(MemOpMap &LEAs,
|
|
||||||
const MachineBasicBlock &BB) {
|
|
||||||
bool Changed = false;
|
|
||||||
|
|
||||||
// Loop over all entries in the table.
|
|
||||||
for (auto &E : LEAs) {
|
|
||||||
auto &List = E.second;
|
|
||||||
|
|
||||||
// Loop over all LEA pairs.
|
|
||||||
auto I1 = List.begin();
|
|
||||||
while (I1 != List.end()) {
|
|
||||||
MachineInstrBuilder NewMI;
|
|
||||||
MachineInstr &First = **I1;
|
|
||||||
MachineOperand &Res = First.getOperand(0);
|
|
||||||
MachineOperand &Base = First.getOperand(1);
|
|
||||||
MachineOperand &Scale = First.getOperand(2);
|
|
||||||
MachineOperand &Index = First.getOperand(3);
|
|
||||||
MachineOperand &Offset = First.getOperand(4);
|
|
||||||
|
|
||||||
const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(First.getOpcode()));
|
|
||||||
const DebugLoc DL = First.getDebugLoc();
|
|
||||||
|
|
||||||
if (!Base.isReg() || !Index.isReg() || !Index.getReg()) {
|
|
||||||
I1++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (TargetRegisterInfo::isPhysicalRegister(Res.getReg()) ||
|
|
||||||
TargetRegisterInfo::isPhysicalRegister(Base.getReg()) ||
|
|
||||||
TargetRegisterInfo::isPhysicalRegister(Index.getReg())) {
|
|
||||||
I1++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check for register class compatibility between Result and
|
|
||||||
// Index operands.
|
|
||||||
const TargetRegisterClass *ResRC = MRI->getRegClass(Res.getReg());
|
|
||||||
const TargetRegisterClass *IndexRC = MRI->getRegClass(Index.getReg());
|
|
||||||
if (TRI->getRegSizeInBits(*ResRC) != TRI->getRegSizeInBits(*IndexRC)) {
|
|
||||||
I1++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
MachineBasicBlock &MBB = *(const_cast<MachineBasicBlock *>(&BB));
|
|
||||||
// R = B + I
|
|
||||||
if (Scale.isImm() && Scale.getImm() == 1 && Offset.isImm() &&
|
|
||||||
!Offset.getImm()) {
|
|
||||||
NewMI = BuildMI(MBB, &First, DL, ADDrr)
|
|
||||||
.addDef(Res.getReg())
|
|
||||||
.addUse(Base.getReg())
|
|
||||||
.addUse(Index.getReg());
|
|
||||||
Changed = NewMI.getInstr() != nullptr;
|
|
||||||
First.eraseFromParent();
|
|
||||||
I1 = List.erase(I1);
|
|
||||||
|
|
||||||
// If List becomes empty remove it from LEAs map.
|
|
||||||
if (List.empty())
|
|
||||||
LEAs.erase(E.first);
|
|
||||||
} else
|
|
||||||
I1++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return Changed;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool OptimizeLEAPass::processBasicBlock(const MachineBasicBlock &MBB) {
|
|
||||||
bool cseDone = false;
|
|
||||||
|
|
||||||
// Legal scale value (1,2,4 & 8) vector.
|
|
||||||
auto LegalScale = [](int scale) {
|
|
||||||
return scale == 1 || scale == 2 || scale == 4 || scale == 8;
|
|
||||||
};
|
|
||||||
|
|
||||||
auto CompareFn = [](const MachineInstr *Arg1,
|
|
||||||
const MachineInstr *Arg2) -> bool {
|
|
||||||
return Arg1->getOperand(2).getImm() >= Arg2->getOperand(2).getImm();
|
|
||||||
};
|
|
||||||
|
|
||||||
// Loop over all entries in the table.
|
|
||||||
for (auto &E : FactorOpt.getLEAMap()) {
|
|
||||||
auto &List = E.second;
|
|
||||||
if (List.size() > 1)
|
|
||||||
List.sort(CompareFn);
|
|
||||||
|
|
||||||
// Loop over all LEA pairs.
|
|
||||||
for (auto Iter1 = List.begin(); Iter1 != List.end(); Iter1++) {
|
|
||||||
for (auto Iter2 = std::next(Iter1); Iter2 != List.end(); Iter2++) {
|
|
||||||
MachineInstr &LI1 = **Iter1;
|
|
||||||
MachineInstr &LI2 = **Iter2;
|
|
||||||
|
|
||||||
if (!DT->dominates(&LI2, &LI1))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
int Scale1 = LI1.getOperand(2).getImm();
|
|
||||||
int Scale2 = LI2.getOperand(2).getImm();
|
|
||||||
assert(LI2.getOperand(0).isReg() && "Result is a VirtualReg");
|
|
||||||
DebugLoc DL = LI1.getDebugLoc();
|
|
||||||
|
|
||||||
// Continue if instruction has already been factorized.
|
|
||||||
if (FactorOpt.checkIfScheduledForRemoval(&LI1))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
int Factor = Scale1 - Scale2;
|
|
||||||
if (Factor > 0 && LegalScale(Factor)) {
|
|
||||||
MachineOperand NewBase = LI2.getOperand(0);
|
|
||||||
MachineOperand NewIndex = LI1.getOperand(3);
|
|
||||||
|
|
||||||
const TargetRegisterClass *LI2ResRC =
|
|
||||||
MRI->getRegClass(LI2.getOperand(0).getReg());
|
|
||||||
const TargetRegisterClass *LI1BaseRC =
|
|
||||||
MRI->getRegClass(LI1.getOperand(1).getReg());
|
|
||||||
|
|
||||||
if (TRI->getRegSizeInBits(*LI1BaseRC) >
|
|
||||||
TRI->getRegSizeInBits(*LI2ResRC)) {
|
|
||||||
MachineInstr *LI1IndexDef =
|
|
||||||
MRI->getVRegDef(LI1.getOperand(3).getReg());
|
|
||||||
if (LI1IndexDef->getOpcode() != TargetOpcode::SUBREG_TO_REG)
|
|
||||||
continue;
|
|
||||||
MachineOperand &SubReg = LI1IndexDef->getOperand(2);
|
|
||||||
const TargetRegisterClass *SubRegRC =
|
|
||||||
MRI->getRegClass(SubReg.getReg());
|
|
||||||
if (TRI->getRegSizeInBits(*SubRegRC) !=
|
|
||||||
TRI->getRegSizeInBits(*LI2ResRC))
|
|
||||||
continue;
|
|
||||||
NewIndex = SubReg;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUG(dbgs() << "CSE LEAs: Candidate to replace: "; LI1.dump(););
|
|
||||||
MachineInstrBuilder NewMI =
|
|
||||||
BuildMI(*(const_cast<MachineBasicBlock *>(&MBB)), &LI1, DL,
|
|
||||||
TII->get(LI1.getOpcode()))
|
|
||||||
.addDef(LI1.getOperand(0).getReg()) // Dst = Dst of LI1.
|
|
||||||
.addUse(NewBase.getReg()) // Base = Dst to LI2.
|
|
||||||
.addImm(Factor) // Scale = Diff b/w scales.
|
|
||||||
.addUse(NewIndex.getReg()) // Index = Index of LI1.
|
|
||||||
.addImm(0) // Disp = 0
|
|
||||||
.addUse(
|
|
||||||
LI1.getOperand(5).getReg()); // Segment = Segmant of LI1.
|
|
||||||
|
|
||||||
cseDone = NewMI.getInstr() != nullptr;
|
|
||||||
|
|
||||||
/// To preserve the SSA nature we need to remove Def flag
|
|
||||||
/// from old result.
|
|
||||||
LI1.getOperand(0).setIsDef(false);
|
|
||||||
|
|
||||||
/// Lazy removal shall ensure that replaced LEA remains
|
|
||||||
/// till we finish processing all the basic block. This shall
|
|
||||||
/// provide opportunity for further factorization based on
|
|
||||||
/// the replaced LEA which will be legal since it has same
|
|
||||||
/// destination as newly formed LEA.
|
|
||||||
FactorOpt.addForLazyRemoval(&LI1);
|
|
||||||
|
|
||||||
NumFactoredLEAs++;
|
|
||||||
DEBUG(dbgs() << "CSE LEAs: Replaced by: "; NewMI->dump(););
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return cseDone;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool OptimizeLEAPass::FactorizeLEAsBasicBlock(MachineDomTreeNode *DN) {
|
|
||||||
bool Changed = false;
|
|
||||||
using StackT = SmallVector<MachineDomTreeNode *, 16>;
|
|
||||||
using VisitedNodeMapT = SmallSet<MachineDomTreeNode *, 16>;
|
|
||||||
|
|
||||||
StackT WorkList;
|
|
||||||
VisitedNodeMapT DomNodeMap;
|
|
||||||
|
|
||||||
WorkList.push_back(DN);
|
|
||||||
while (!WorkList.empty()) {
|
|
||||||
MachineDomTreeNode *MDN = WorkList.back();
|
|
||||||
FactorOpt.collectDataForBasicBlock(MDN->getBlock());
|
|
||||||
Changed |= processBasicBlock(*MDN->getBlock());
|
|
||||||
|
|
||||||
if (DomNodeMap.find(MDN) == DomNodeMap.end()) {
|
|
||||||
DomNodeMap.insert(MDN);
|
|
||||||
for (auto Child : MDN->getChildren())
|
|
||||||
WorkList.push_back(Child);
|
|
||||||
}
|
|
||||||
|
|
||||||
MachineDomTreeNode *TDM = WorkList.back();
|
|
||||||
if (MDN->getLevel() == TDM->getLevel()) {
|
|
||||||
FactorOpt.removeDataForBasicBlock();
|
|
||||||
DomNodeMap.erase(MDN);
|
|
||||||
WorkList.pop_back();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return Changed;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool OptimizeLEAPass::FactorizeLEAsAllBasicBlocks(MachineFunction &MF) {
|
|
||||||
bool Changed = FactorizeLEAsBasicBlock(DT->getRootNode());
|
|
||||||
FactorOpt.performCleanup();
|
|
||||||
return Changed;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
|
bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
|
||||||
bool Changed = false;
|
bool Changed = false;
|
||||||
|
|
||||||
|
@ -1146,10 +677,6 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
|
||||||
MRI = &MF.getRegInfo();
|
MRI = &MF.getRegInfo();
|
||||||
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
|
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
|
||||||
TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
|
TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
|
||||||
DT = &getAnalysis<MachineDominatorTree>();
|
|
||||||
|
|
||||||
// Attempt factorizing LEAs.
|
|
||||||
Changed |= FactorizeLEAsAllBasicBlocks(MF);
|
|
||||||
|
|
||||||
// Process all basic blocks.
|
// Process all basic blocks.
|
||||||
for (auto &MBB : MF) {
|
for (auto &MBB : MF) {
|
||||||
|
@ -1166,9 +693,6 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
|
||||||
// Remove redundant LEA instructions.
|
// Remove redundant LEA instructions.
|
||||||
Changed |= removeRedundantLEAs(LEAs);
|
Changed |= removeRedundantLEAs(LEAs);
|
||||||
|
|
||||||
// Strength reduce LEA instructions.
|
|
||||||
Changed |= strengthReduceLEAs(LEAs, MBB);
|
|
||||||
|
|
||||||
// Remove redundant address calculations. Do it only for -Os/-Oz since only
|
// Remove redundant address calculations. Do it only for -Os/-Oz since only
|
||||||
// a code size gain is expected from this part of the pass.
|
// a code size gain is expected from this part of the pass.
|
||||||
if (MF.getFunction()->optForSize())
|
if (MF.getFunction()->optForSize())
|
||||||
|
|
|
@ -388,7 +388,7 @@ define void @test_variadic_call_2(i8** %addr_ptr, double* %val_ptr) {
|
||||||
; X32-NEXT: movl 4(%ecx), %ecx
|
; X32-NEXT: movl 4(%ecx), %ecx
|
||||||
; X32-NEXT: movl %eax, (%esp)
|
; X32-NEXT: movl %eax, (%esp)
|
||||||
; X32-NEXT: movl $4, %eax
|
; X32-NEXT: movl $4, %eax
|
||||||
; X32-NEXT: addl %esp, %eax
|
; X32-NEXT: leal (%esp,%eax), %eax
|
||||||
; X32-NEXT: movl %edx, 4(%esp)
|
; X32-NEXT: movl %edx, 4(%esp)
|
||||||
; X32-NEXT: movl %ecx, 4(%eax)
|
; X32-NEXT: movl %ecx, 4(%eax)
|
||||||
; X32-NEXT: calll variadic_callee
|
; X32-NEXT: calll variadic_callee
|
||||||
|
|
|
@ -5,10 +5,10 @@
|
||||||
define i32* @test_gep_i8(i32 *%arr, i8 %ind) {
|
define i32* @test_gep_i8(i32 *%arr, i8 %ind) {
|
||||||
; X64_GISEL-LABEL: test_gep_i8:
|
; X64_GISEL-LABEL: test_gep_i8:
|
||||||
; X64_GISEL: # BB#0:
|
; X64_GISEL: # BB#0:
|
||||||
; X64_GISEL-NEXT: movq $4, %rcx
|
; X64_GISEL-NEXT: movq $4, %rax
|
||||||
; X64_GISEL-NEXT: movsbq %sil, %rax
|
; X64_GISEL-NEXT: movsbq %sil, %rcx
|
||||||
; X64_GISEL-NEXT: imulq %rcx, %rax
|
; X64_GISEL-NEXT: imulq %rax, %rcx
|
||||||
; X64_GISEL-NEXT: addq %rdi, %rax
|
; X64_GISEL-NEXT: leaq (%rdi,%rcx), %rax
|
||||||
; X64_GISEL-NEXT: retq
|
; X64_GISEL-NEXT: retq
|
||||||
;
|
;
|
||||||
; X64-LABEL: test_gep_i8:
|
; X64-LABEL: test_gep_i8:
|
||||||
|
@ -25,7 +25,7 @@ define i32* @test_gep_i8_const(i32 *%arr) {
|
||||||
; X64_GISEL-LABEL: test_gep_i8_const:
|
; X64_GISEL-LABEL: test_gep_i8_const:
|
||||||
; X64_GISEL: # BB#0:
|
; X64_GISEL: # BB#0:
|
||||||
; X64_GISEL-NEXT: movq $80, %rax
|
; X64_GISEL-NEXT: movq $80, %rax
|
||||||
; X64_GISEL-NEXT: addq %rdi, %rax
|
; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax
|
||||||
; X64_GISEL-NEXT: retq
|
; X64_GISEL-NEXT: retq
|
||||||
;
|
;
|
||||||
; X64-LABEL: test_gep_i8_const:
|
; X64-LABEL: test_gep_i8_const:
|
||||||
|
@ -39,10 +39,10 @@ define i32* @test_gep_i8_const(i32 *%arr) {
|
||||||
define i32* @test_gep_i16(i32 *%arr, i16 %ind) {
|
define i32* @test_gep_i16(i32 *%arr, i16 %ind) {
|
||||||
; X64_GISEL-LABEL: test_gep_i16:
|
; X64_GISEL-LABEL: test_gep_i16:
|
||||||
; X64_GISEL: # BB#0:
|
; X64_GISEL: # BB#0:
|
||||||
; X64_GISEL-NEXT: movq $4, %rcx
|
; X64_GISEL-NEXT: movq $4, %rax
|
||||||
; X64_GISEL-NEXT: movswq %si, %rax
|
; X64_GISEL-NEXT: movswq %si, %rcx
|
||||||
; X64_GISEL-NEXT: imulq %rcx, %rax
|
; X64_GISEL-NEXT: imulq %rax, %rcx
|
||||||
; X64_GISEL-NEXT: addq %rdi, %rax
|
; X64_GISEL-NEXT: leaq (%rdi,%rcx), %rax
|
||||||
; X64_GISEL-NEXT: retq
|
; X64_GISEL-NEXT: retq
|
||||||
;
|
;
|
||||||
; X64-LABEL: test_gep_i16:
|
; X64-LABEL: test_gep_i16:
|
||||||
|
@ -59,7 +59,7 @@ define i32* @test_gep_i16_const(i32 *%arr) {
|
||||||
; X64_GISEL-LABEL: test_gep_i16_const:
|
; X64_GISEL-LABEL: test_gep_i16_const:
|
||||||
; X64_GISEL: # BB#0:
|
; X64_GISEL: # BB#0:
|
||||||
; X64_GISEL-NEXT: movq $80, %rax
|
; X64_GISEL-NEXT: movq $80, %rax
|
||||||
; X64_GISEL-NEXT: addq %rdi, %rax
|
; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax
|
||||||
; X64_GISEL-NEXT: retq
|
; X64_GISEL-NEXT: retq
|
||||||
;
|
;
|
||||||
; X64-LABEL: test_gep_i16_const:
|
; X64-LABEL: test_gep_i16_const:
|
||||||
|
@ -73,10 +73,10 @@ define i32* @test_gep_i16_const(i32 *%arr) {
|
||||||
define i32* @test_gep_i32(i32 *%arr, i32 %ind) {
|
define i32* @test_gep_i32(i32 *%arr, i32 %ind) {
|
||||||
; X64_GISEL-LABEL: test_gep_i32:
|
; X64_GISEL-LABEL: test_gep_i32:
|
||||||
; X64_GISEL: # BB#0:
|
; X64_GISEL: # BB#0:
|
||||||
; X64_GISEL-NEXT: movq $4, %rcx
|
; X64_GISEL-NEXT: movq $4, %rax
|
||||||
; X64_GISEL-NEXT: movslq %esi, %rax
|
; X64_GISEL-NEXT: movslq %esi, %rcx
|
||||||
; X64_GISEL-NEXT: imulq %rcx, %rax
|
; X64_GISEL-NEXT: imulq %rax, %rcx
|
||||||
; X64_GISEL-NEXT: addq %rdi, %rax
|
; X64_GISEL-NEXT: leaq (%rdi,%rcx), %rax
|
||||||
; X64_GISEL-NEXT: retq
|
; X64_GISEL-NEXT: retq
|
||||||
;
|
;
|
||||||
; X64-LABEL: test_gep_i32:
|
; X64-LABEL: test_gep_i32:
|
||||||
|
@ -92,7 +92,7 @@ define i32* @test_gep_i32_const(i32 *%arr) {
|
||||||
; X64_GISEL-LABEL: test_gep_i32_const:
|
; X64_GISEL-LABEL: test_gep_i32_const:
|
||||||
; X64_GISEL: # BB#0:
|
; X64_GISEL: # BB#0:
|
||||||
; X64_GISEL-NEXT: movq $20, %rax
|
; X64_GISEL-NEXT: movq $20, %rax
|
||||||
; X64_GISEL-NEXT: addq %rdi, %rax
|
; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax
|
||||||
; X64_GISEL-NEXT: retq
|
; X64_GISEL-NEXT: retq
|
||||||
;
|
;
|
||||||
; X64-LABEL: test_gep_i32_const:
|
; X64-LABEL: test_gep_i32_const:
|
||||||
|
@ -108,7 +108,7 @@ define i32* @test_gep_i64(i32 *%arr, i64 %ind) {
|
||||||
; X64_GISEL: # BB#0:
|
; X64_GISEL: # BB#0:
|
||||||
; X64_GISEL-NEXT: movq $4, %rax
|
; X64_GISEL-NEXT: movq $4, %rax
|
||||||
; X64_GISEL-NEXT: imulq %rsi, %rax
|
; X64_GISEL-NEXT: imulq %rsi, %rax
|
||||||
; X64_GISEL-NEXT: addq %rdi, %rax
|
; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax
|
||||||
; X64_GISEL-NEXT: retq
|
; X64_GISEL-NEXT: retq
|
||||||
;
|
;
|
||||||
; X64-LABEL: test_gep_i64:
|
; X64-LABEL: test_gep_i64:
|
||||||
|
@ -123,7 +123,7 @@ define i32* @test_gep_i64_const(i32 *%arr) {
|
||||||
; X64_GISEL-LABEL: test_gep_i64_const:
|
; X64_GISEL-LABEL: test_gep_i64_const:
|
||||||
; X64_GISEL: # BB#0:
|
; X64_GISEL: # BB#0:
|
||||||
; X64_GISEL-NEXT: movq $20, %rax
|
; X64_GISEL-NEXT: movq $20, %rax
|
||||||
; X64_GISEL-NEXT: addq %rdi, %rax
|
; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax
|
||||||
; X64_GISEL-NEXT: retq
|
; X64_GISEL-NEXT: retq
|
||||||
;
|
;
|
||||||
; X64-LABEL: test_gep_i64_const:
|
; X64-LABEL: test_gep_i64_const:
|
||||||
|
|
|
@ -181,7 +181,7 @@ define i32 @test_gep_folding_largeGepIndex(i32* %arr, i32 %val) {
|
||||||
; ALL-LABEL: test_gep_folding_largeGepIndex:
|
; ALL-LABEL: test_gep_folding_largeGepIndex:
|
||||||
; ALL: # BB#0:
|
; ALL: # BB#0:
|
||||||
; ALL-NEXT: movabsq $228719476720, %rax # imm = 0x3540BE3FF0
|
; ALL-NEXT: movabsq $228719476720, %rax # imm = 0x3540BE3FF0
|
||||||
; ALL-NEXT: addq %rdi, %rax
|
; ALL-NEXT: leaq (%rdi,%rax), %rax
|
||||||
; ALL-NEXT: movl %esi, (%rax)
|
; ALL-NEXT: movl %esi, (%rax)
|
||||||
; ALL-NEXT: movl (%rax), %eax
|
; ALL-NEXT: movl (%rax), %eax
|
||||||
; ALL-NEXT: retq
|
; ALL-NEXT: retq
|
||||||
|
|
|
@ -9,21 +9,27 @@ define void @test_func(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr {
|
||||||
; X64: # BB#0: # %entry
|
; X64: # BB#0: # %entry
|
||||||
; X64-NEXT: movl (%rdi), %eax
|
; X64-NEXT: movl (%rdi), %eax
|
||||||
; X64-NEXT: movl 16(%rdi), %ecx
|
; X64-NEXT: movl 16(%rdi), %ecx
|
||||||
|
; X64-NEXT: leal (%rax,%rcx), %edx
|
||||||
; X64-NEXT: leal 1(%rax,%rcx), %eax
|
; X64-NEXT: leal 1(%rax,%rcx), %eax
|
||||||
; X64-NEXT: movl %eax, 12(%rdi)
|
; X64-NEXT: movl %eax, 12(%rdi)
|
||||||
; X64-NEXT: addq %ecx, %eax
|
; X64-NEXT: leal 1(%rcx,%rdx), %eax
|
||||||
; X64-NEXT: movl %eax, 16(%rdi)
|
; X64-NEXT: movl %eax, 16(%rdi)
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
;
|
;
|
||||||
; X86-LABEL: test_func:
|
; X86-LABEL: test_func:
|
||||||
; X86: # BB#0: # %entry
|
; X86: # BB#0: # %entry
|
||||||
|
; X86-NEXT: pushl %esi
|
||||||
|
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||||
|
; X86-NEXT: .cfi_offset %esi, -8
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: movl (%eax), %ecx
|
; X86-NEXT: movl (%eax), %ecx
|
||||||
; X86-NEXT: movl 16(%eax), %edx
|
; X86-NEXT: movl 16(%eax), %edx
|
||||||
; X86-NEXT: leal 1(%ecx,%edx), %ecx
|
; X86-NEXT: leal 1(%ecx,%edx), %esi
|
||||||
; X86-NEXT: movl %ecx, 12(%eax)
|
|
||||||
; X86-NEXT: addl %edx, %ecx
|
; X86-NEXT: addl %edx, %ecx
|
||||||
|
; X86-NEXT: movl %esi, 12(%eax)
|
||||||
|
; X86-NEXT: leal 1(%edx,%ecx), %ecx
|
||||||
; X86-NEXT: movl %ecx, 16(%eax)
|
; X86-NEXT: movl %ecx, 16(%eax)
|
||||||
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
%h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0
|
%h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X64
|
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64
|
||||||
; RUN: llc < %s -mtriple=i686-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X86
|
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s -check-prefix=X86
|
||||||
|
|
||||||
%struct.SA = type { i32 , i32 , i32 , i32 , i32};
|
%struct.SA = type { i32 , i32 , i32 , i32 , i32};
|
||||||
|
|
||||||
|
@ -10,39 +10,43 @@ define void @foo(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 {
|
||||||
; X64-NEXT: .p2align 4, 0x90
|
; X64-NEXT: .p2align 4, 0x90
|
||||||
; X64-NEXT: .LBB0_1: # %loop
|
; X64-NEXT: .LBB0_1: # %loop
|
||||||
; X64-NEXT: # =>This Inner Loop Header: Depth=1
|
; X64-NEXT: # =>This Inner Loop Header: Depth=1
|
||||||
; X64-NEXT: movl 16(%rdi), %eax
|
; X64-NEXT: movl (%rdi), %eax
|
||||||
; X64-NEXT: movl (%rdi), %ecx
|
; X64-NEXT: movl 16(%rdi), %ecx
|
||||||
; X64-NEXT: addl %eax, %ecx
|
; X64-NEXT: leal 1(%rax,%rcx), %edx
|
||||||
; X64-NEXT: incl %ecx
|
; X64-NEXT: movl %edx, 12(%rdi)
|
||||||
; X64-NEXT: movl %ecx, 12(%rdi)
|
|
||||||
; X64-NEXT: decl %esi
|
; X64-NEXT: decl %esi
|
||||||
; X64-NEXT: jne .LBB0_1
|
; X64-NEXT: jne .LBB0_1
|
||||||
; X64-NEXT: # BB#2: # %exit
|
; X64-NEXT: # BB#2: # %exit
|
||||||
; X64-NEXT: addl %eax, %ecx
|
; X64-NEXT: addl %ecx, %eax
|
||||||
; X64-NEXT: movl %ecx, 16(%rdi)
|
; X64-NEXT: leal 1(%rcx,%rax), %eax
|
||||||
|
; X64-NEXT: movl %eax, 16(%rdi)
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
;
|
;
|
||||||
; X86-LABEL: foo:
|
; X86-LABEL: foo:
|
||||||
; X86: # BB#0: # %entry
|
; X86: # BB#0: # %entry
|
||||||
; X86-NEXT: pushl %esi
|
; X86-NEXT: pushl %edi
|
||||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||||
; X86-NEXT: .cfi_offset %esi, -8
|
; X86-NEXT: pushl %esi
|
||||||
|
; X86-NEXT: .cfi_def_cfa_offset 12
|
||||||
|
; X86-NEXT: .cfi_offset %esi, -12
|
||||||
|
; X86-NEXT: .cfi_offset %edi, -8
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: .p2align 4, 0x90
|
; X86-NEXT: .p2align 4, 0x90
|
||||||
; X86-NEXT: .LBB0_1: # %loop
|
; X86-NEXT: .LBB0_1: # %loop
|
||||||
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
||||||
; X86-NEXT: movl 16(%eax), %edx
|
; X86-NEXT: movl (%eax), %edx
|
||||||
; X86-NEXT: movl (%eax), %esi
|
; X86-NEXT: movl 16(%eax), %esi
|
||||||
; X86-NEXT: addl %edx, %esi
|
; X86-NEXT: leal 1(%edx,%esi), %edi
|
||||||
; X86-NEXT: incl %esi
|
; X86-NEXT: movl %edi, 12(%eax)
|
||||||
; X86-NEXT: movl %esi, 12(%eax)
|
|
||||||
; X86-NEXT: decl %ecx
|
; X86-NEXT: decl %ecx
|
||||||
; X86-NEXT: jne .LBB0_1
|
; X86-NEXT: jne .LBB0_1
|
||||||
; X86-NEXT: # BB#2: # %exit
|
; X86-NEXT: # BB#2: # %exit
|
||||||
; X86-NEXT: addl %edx, %esi
|
; X86-NEXT: addl %esi, %edx
|
||||||
; X86-NEXT: movl %esi, 16(%eax)
|
; X86-NEXT: leal 1(%esi,%edx), %ecx
|
||||||
|
; X86-NEXT: movl %ecx, 16(%eax)
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
|
; X86-NEXT: popl %edi
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
br label %loop
|
br label %loop
|
||||||
|
|
|
@ -8,7 +8,7 @@ define i32 @foo(i32 %a, i32 %b) local_unnamed_addr #0 {
|
||||||
; X64-NEXT: # kill: %esi<def> %esi<kill> %rsi<def>
|
; X64-NEXT: # kill: %esi<def> %esi<kill> %rsi<def>
|
||||||
; X64-NEXT: # kill: %edi<def> %edi<kill> %rdi<def>
|
; X64-NEXT: # kill: %edi<def> %edi<kill> %rdi<def>
|
||||||
; X64-NEXT: leal 4(%rdi,%rsi,2), %ecx
|
; X64-NEXT: leal 4(%rdi,%rsi,2), %ecx
|
||||||
; X64-NEXT: leal (%ecx,%esi,2), %eax
|
; X64-NEXT: leal 4(%rdi,%rsi,4), %eax
|
||||||
; X64-NEXT: imull %ecx, %eax
|
; X64-NEXT: imull %ecx, %eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -16,9 +16,9 @@ define i32 @foo(i32 %a, i32 %b) local_unnamed_addr #0 {
|
||||||
; X86: # BB#0: # %entry
|
; X86: # BB#0: # %entry
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: leal 4(%ecx,%eax,2), %ecx
|
; X86-NEXT: leal 4(%ecx,%eax,2), %edx
|
||||||
; X86-NEXT: leal (%ecx,%eax,2), %eax
|
; X86-NEXT: leal 4(%ecx,%eax,4), %eax
|
||||||
; X86-NEXT: imull %ecx, %eax
|
; X86-NEXT: imull %edx, %eax
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
%mul = shl i32 %b, 1
|
%mul = shl i32 %b, 1
|
||||||
|
@ -36,7 +36,7 @@ define i32 @foo1(i32 %a, i32 %b) local_unnamed_addr #0 {
|
||||||
; X64-NEXT: # kill: %esi<def> %esi<kill> %rsi<def>
|
; X64-NEXT: # kill: %esi<def> %esi<kill> %rsi<def>
|
||||||
; X64-NEXT: # kill: %edi<def> %edi<kill> %rdi<def>
|
; X64-NEXT: # kill: %edi<def> %edi<kill> %rdi<def>
|
||||||
; X64-NEXT: leal 4(%rdi,%rsi,4), %ecx
|
; X64-NEXT: leal 4(%rdi,%rsi,4), %ecx
|
||||||
; X64-NEXT: leal (%ecx,%esi,4), %eax
|
; X64-NEXT: leal 4(%rdi,%rsi,8), %eax
|
||||||
; X64-NEXT: imull %ecx, %eax
|
; X64-NEXT: imull %ecx, %eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
;
|
;
|
||||||
|
@ -44,9 +44,9 @@ define i32 @foo1(i32 %a, i32 %b) local_unnamed_addr #0 {
|
||||||
; X86: # BB#0: # %entry
|
; X86: # BB#0: # %entry
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: leal 4(%ecx,%eax,4), %ecx
|
; X86-NEXT: leal 4(%ecx,%eax,4), %edx
|
||||||
; X86-NEXT: leal (%ecx,%eax,4), %eax
|
; X86-NEXT: leal 4(%ecx,%eax,8), %eax
|
||||||
; X86-NEXT: imull %ecx, %eax
|
; X86-NEXT: imull %edx, %eax
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
%mul = shl i32 %b, 2
|
%mul = shl i32 %b, 2
|
||||||
|
@ -68,23 +68,29 @@ define i32 @foo1_mult_basic_blocks(i32 %a, i32 %b) local_unnamed_addr #0 {
|
||||||
; X64-NEXT: cmpl $10, %ecx
|
; X64-NEXT: cmpl $10, %ecx
|
||||||
; X64-NEXT: je .LBB2_2
|
; X64-NEXT: je .LBB2_2
|
||||||
; X64-NEXT: # BB#1: # %mid
|
; X64-NEXT: # BB#1: # %mid
|
||||||
; X64-NEXT: leal (%ecx,%esi,4), %eax
|
; X64-NEXT: leal 4(%rdi,%rsi,8), %eax
|
||||||
; X64-NEXT: imull %ecx, %eax
|
; X64-NEXT: imull %eax, %ecx
|
||||||
|
; X64-NEXT: movl %ecx, %eax
|
||||||
; X64-NEXT: .LBB2_2: # %exit
|
; X64-NEXT: .LBB2_2: # %exit
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
;
|
;
|
||||||
; X86-LABEL: foo1_mult_basic_blocks:
|
; X86-LABEL: foo1_mult_basic_blocks:
|
||||||
; X86: # BB#0: # %entry
|
; X86: # BB#0: # %entry
|
||||||
|
; X86-NEXT: pushl %esi
|
||||||
|
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||||
|
; X86-NEXT: .cfi_offset %esi, -8
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||||
; X86-NEXT: leal 4(%eax,%edx,4), %ecx
|
; X86-NEXT: leal 4(%esi,%edx,4), %ecx
|
||||||
; X86-NEXT: xorl %eax, %eax
|
; X86-NEXT: xorl %eax, %eax
|
||||||
; X86-NEXT: cmpl $10, %ecx
|
; X86-NEXT: cmpl $10, %ecx
|
||||||
; X86-NEXT: je .LBB2_2
|
; X86-NEXT: je .LBB2_2
|
||||||
; X86-NEXT: # BB#1: # %mid
|
; X86-NEXT: # BB#1: # %mid
|
||||||
; X86-NEXT: leal (%ecx,%edx,4), %eax
|
; X86-NEXT: leal 4(%esi,%edx,8), %eax
|
||||||
; X86-NEXT: imull %ecx, %eax
|
; X86-NEXT: imull %eax, %ecx
|
||||||
|
; X86-NEXT: movl %ecx, %eax
|
||||||
; X86-NEXT: .LBB2_2: # %exit
|
; X86-NEXT: .LBB2_2: # %exit
|
||||||
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
%mul = shl i32 %b, 2
|
%mul = shl i32 %b, 2
|
||||||
|
|
|
@ -1,31 +1,41 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X64
|
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64
|
||||||
; RUN: llc < %s -mtriple=i686-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X86
|
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s -check-prefix=X86
|
||||||
|
|
||||||
%struct.SA = type { i32 , i32 , i32 , i32 , i32};
|
%struct.SA = type { i32 , i32 , i32 , i32 , i32};
|
||||||
|
|
||||||
define void @foo(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 {
|
define void @foo(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 {
|
||||||
; X64-LABEL: foo:
|
; X64-LABEL: foo:
|
||||||
; X64: # BB#0: # %entry
|
; X64: # BB#0: # %entry
|
||||||
; X64-NEXT: movl (%rdi), %eax
|
; X64-NEXT: movl 16(%rdi), %eax
|
||||||
; X64-NEXT: movl 16(%rdi), %ecx
|
; X64-NEXT: movl (%rdi), %ecx
|
||||||
; X64-NEXT: leal (%rax,%rcx,4), %eax
|
; X64-NEXT: addl %eax, %ecx
|
||||||
; X64-NEXT: addl $1, %eax
|
; X64-NEXT: addl %eax, %ecx
|
||||||
; X64-NEXT: movl %eax, 12(%rdi)
|
; X64-NEXT: addl %eax, %ecx
|
||||||
; X64-NEXT: addl %ecx, %eax
|
; X64-NEXT: leal (%rcx,%rax), %edx
|
||||||
|
; X64-NEXT: leal 1(%rax,%rcx), %ecx
|
||||||
|
; X64-NEXT: movl %ecx, 12(%rdi)
|
||||||
|
; X64-NEXT: leal 1(%rax,%rdx), %eax
|
||||||
; X64-NEXT: movl %eax, 16(%rdi)
|
; X64-NEXT: movl %eax, 16(%rdi)
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
;
|
;
|
||||||
; X86-LABEL: foo:
|
; X86-LABEL: foo:
|
||||||
; X86: # BB#0: # %entry
|
; X86: # BB#0: # %entry
|
||||||
|
; X86-NEXT: pushl %esi
|
||||||
|
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||||
|
; X86-NEXT: .cfi_offset %esi, -8
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: movl (%eax), %ecx
|
; X86-NEXT: movl 16(%eax), %ecx
|
||||||
; X86-NEXT: movl 16(%eax), %edx
|
; X86-NEXT: movl (%eax), %edx
|
||||||
; X86-NEXT: leal (%ecx,%edx,4), %ecx
|
; X86-NEXT: addl %ecx, %edx
|
||||||
; X86-NEXT: addl $1, %ecx
|
; X86-NEXT: addl %ecx, %edx
|
||||||
; X86-NEXT: movl %ecx, 12(%eax)
|
; X86-NEXT: addl %ecx, %edx
|
||||||
; X86-NEXT: addl %edx, %ecx
|
; X86-NEXT: leal 1(%ecx,%edx), %esi
|
||||||
|
; X86-NEXT: addl %ecx, %edx
|
||||||
|
; X86-NEXT: movl %esi, 12(%eax)
|
||||||
|
; X86-NEXT: leal 1(%ecx,%edx), %ecx
|
||||||
; X86-NEXT: movl %ecx, 16(%eax)
|
; X86-NEXT: movl %ecx, 16(%eax)
|
||||||
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
%h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0
|
%h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0
|
||||||
|
@ -52,15 +62,15 @@ define void @foo_loop(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0
|
||||||
; X64-NEXT: .p2align 4, 0x90
|
; X64-NEXT: .p2align 4, 0x90
|
||||||
; X64-NEXT: .LBB1_1: # %loop
|
; X64-NEXT: .LBB1_1: # %loop
|
||||||
; X64-NEXT: # =>This Inner Loop Header: Depth=1
|
; X64-NEXT: # =>This Inner Loop Header: Depth=1
|
||||||
; X64-NEXT: movl 16(%rdi), %eax
|
|
||||||
; X64-NEXT: movl (%rdi), %ecx
|
; X64-NEXT: movl (%rdi), %ecx
|
||||||
; X64-NEXT: addl %eax, %ecx
|
; X64-NEXT: movl 16(%rdi), %eax
|
||||||
; X64-NEXT: incl %ecx
|
; X64-NEXT: leal 1(%rcx,%rax), %edx
|
||||||
; X64-NEXT: movl %ecx, 12(%rdi)
|
; X64-NEXT: movl %edx, 12(%rdi)
|
||||||
; X64-NEXT: decl %esi
|
; X64-NEXT: decl %esi
|
||||||
; X64-NEXT: jne .LBB1_1
|
; X64-NEXT: jne .LBB1_1
|
||||||
; X64-NEXT: # BB#2: # %exit
|
; X64-NEXT: # BB#2: # %exit
|
||||||
; X64-NEXT: addl %eax, %ecx
|
; X64-NEXT: addl %eax, %ecx
|
||||||
|
; X64-NEXT: leal 1(%rax,%rcx), %ecx
|
||||||
; X64-NEXT: addl %eax, %ecx
|
; X64-NEXT: addl %eax, %ecx
|
||||||
; X64-NEXT: addl %eax, %ecx
|
; X64-NEXT: addl %eax, %ecx
|
||||||
; X64-NEXT: addl %eax, %ecx
|
; X64-NEXT: addl %eax, %ecx
|
||||||
|
@ -72,23 +82,26 @@ define void @foo_loop(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0
|
||||||
;
|
;
|
||||||
; X86-LABEL: foo_loop:
|
; X86-LABEL: foo_loop:
|
||||||
; X86: # BB#0: # %entry
|
; X86: # BB#0: # %entry
|
||||||
; X86-NEXT: pushl %esi
|
; X86-NEXT: pushl %edi
|
||||||
; X86-NEXT: .cfi_def_cfa_offset 8
|
; X86-NEXT: .cfi_def_cfa_offset 8
|
||||||
; X86-NEXT: .cfi_offset %esi, -8
|
; X86-NEXT: pushl %esi
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
; X86-NEXT: .cfi_def_cfa_offset 12
|
||||||
|
; X86-NEXT: .cfi_offset %esi, -12
|
||||||
|
; X86-NEXT: .cfi_offset %edi, -8
|
||||||
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: .p2align 4, 0x90
|
; X86-NEXT: .p2align 4, 0x90
|
||||||
; X86-NEXT: .LBB1_1: # %loop
|
; X86-NEXT: .LBB1_1: # %loop
|
||||||
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
||||||
|
; X86-NEXT: movl (%eax), %esi
|
||||||
; X86-NEXT: movl 16(%eax), %ecx
|
; X86-NEXT: movl 16(%eax), %ecx
|
||||||
; X86-NEXT: movl (%eax), %edx
|
; X86-NEXT: leal 1(%esi,%ecx), %edi
|
||||||
; X86-NEXT: addl %ecx, %edx
|
; X86-NEXT: movl %edi, 12(%eax)
|
||||||
; X86-NEXT: incl %edx
|
; X86-NEXT: decl %edx
|
||||||
; X86-NEXT: movl %edx, 12(%eax)
|
|
||||||
; X86-NEXT: decl %esi
|
|
||||||
; X86-NEXT: jne .LBB1_1
|
; X86-NEXT: jne .LBB1_1
|
||||||
; X86-NEXT: # BB#2: # %exit
|
; X86-NEXT: # BB#2: # %exit
|
||||||
; X86-NEXT: addl %ecx, %edx
|
; X86-NEXT: addl %ecx, %esi
|
||||||
|
; X86-NEXT: leal 1(%ecx,%esi), %edx
|
||||||
; X86-NEXT: addl %ecx, %edx
|
; X86-NEXT: addl %ecx, %edx
|
||||||
; X86-NEXT: addl %ecx, %edx
|
; X86-NEXT: addl %ecx, %edx
|
||||||
; X86-NEXT: addl %ecx, %edx
|
; X86-NEXT: addl %ecx, %edx
|
||||||
|
@ -97,6 +110,7 @@ define void @foo_loop(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0
|
||||||
; X86-NEXT: addl %ecx, %edx
|
; X86-NEXT: addl %ecx, %edx
|
||||||
; X86-NEXT: movl %edx, 16(%eax)
|
; X86-NEXT: movl %edx, 16(%eax)
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
|
; X86-NEXT: popl %edi
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
br label %loop
|
br label %loop
|
||||||
|
|
|
@ -558,10 +558,11 @@ define i16 @test_mul_by_28(i16 %x) {
|
||||||
define i16 @test_mul_by_29(i16 %x) {
|
define i16 @test_mul_by_29(i16 %x) {
|
||||||
; X86-LABEL: test_mul_by_29:
|
; X86-LABEL: test_mul_by_29:
|
||||||
; X86: # BB#0:
|
; X86: # BB#0:
|
||||||
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: leal (%eax,%eax,8), %ecx
|
; X86-NEXT: leal (%ecx,%ecx,8), %eax
|
||||||
; X86-NEXT: leal (%ecx,%ecx,2), %ecx
|
; X86-NEXT: leal (%eax,%eax,2), %eax
|
||||||
; X86-NEXT: leal (%ecx,%eax,2), %eax
|
; X86-NEXT: addl %ecx, %eax
|
||||||
|
; X86-NEXT: addl %ecx, %eax
|
||||||
; X86-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
|
; X86-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
|
@ -570,7 +571,8 @@ define i16 @test_mul_by_29(i16 %x) {
|
||||||
; X64-NEXT: # kill: %edi<def> %edi<kill> %rdi<def>
|
; X64-NEXT: # kill: %edi<def> %edi<kill> %rdi<def>
|
||||||
; X64-NEXT: leal (%rdi,%rdi,8), %eax
|
; X64-NEXT: leal (%rdi,%rdi,8), %eax
|
||||||
; X64-NEXT: leal (%rax,%rax,2), %eax
|
; X64-NEXT: leal (%rax,%rax,2), %eax
|
||||||
; X64-NEXT: leal (%rax,%rdi,2), %eax
|
; X64-NEXT: addl %edi, %eax
|
||||||
|
; X64-NEXT: addl %edi, %eax
|
||||||
; X64-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
|
; X64-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%mul = mul nsw i16 %x, 29
|
%mul = mul nsw i16 %x, 29
|
||||||
|
|
|
@ -1457,10 +1457,11 @@ define i32 @test_mul_by_28(i32 %x) {
|
||||||
define i32 @test_mul_by_29(i32 %x) {
|
define i32 @test_mul_by_29(i32 %x) {
|
||||||
; X86-LABEL: test_mul_by_29:
|
; X86-LABEL: test_mul_by_29:
|
||||||
; X86: # BB#0:
|
; X86: # BB#0:
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X86-NEXT: leal (%eax,%eax,8), %ecx
|
; X86-NEXT: leal (%ecx,%ecx,8), %eax
|
||||||
; X86-NEXT: leal (%ecx,%ecx,2), %ecx
|
; X86-NEXT: leal (%eax,%eax,2), %eax
|
||||||
; X86-NEXT: leal (%ecx,%eax,2), %eax
|
; X86-NEXT: addl %ecx, %eax
|
||||||
|
; X86-NEXT: addl %ecx, %eax
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-HSW-LABEL: test_mul_by_29:
|
; X64-HSW-LABEL: test_mul_by_29:
|
||||||
|
@ -1468,7 +1469,8 @@ define i32 @test_mul_by_29(i32 %x) {
|
||||||
; X64-HSW-NEXT: # kill: %edi<def> %edi<kill> %rdi<def>
|
; X64-HSW-NEXT: # kill: %edi<def> %edi<kill> %rdi<def>
|
||||||
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50]
|
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
|
||||||
|
; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_29:
|
; X64-JAG-LABEL: test_mul_by_29:
|
||||||
|
@ -1476,7 +1478,8 @@ define i32 @test_mul_by_29(i32 %x) {
|
||||||
; X64-JAG-NEXT: # kill: %edi<def> %edi<kill> %rdi<def>
|
; X64-JAG-NEXT: # kill: %edi<def> %edi<kill> %rdi<def>
|
||||||
; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
|
||||||
; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
|
; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
|
||||||
; X64-JAG-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50]
|
; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
|
||||||
|
; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
|
||||||
; X64-JAG-NEXT: retq # sched: [4:1.00]
|
; X64-JAG-NEXT: retq # sched: [4:1.00]
|
||||||
;
|
;
|
||||||
; X86-NOOPT-LABEL: test_mul_by_29:
|
; X86-NOOPT-LABEL: test_mul_by_29:
|
||||||
|
|
|
@ -1523,7 +1523,8 @@ define i64 @test_mul_by_29(i64 %x) {
|
||||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X86-NEXT: leal (%eax,%eax,8), %ecx
|
; X86-NEXT: leal (%eax,%eax,8), %ecx
|
||||||
; X86-NEXT: leal (%ecx,%ecx,2), %ecx
|
; X86-NEXT: leal (%ecx,%ecx,2), %ecx
|
||||||
; X86-NEXT: leal (%ecx,%eax,2), %ecx
|
; X86-NEXT: addl %eax, %ecx
|
||||||
|
; X86-NEXT: addl %eax, %ecx
|
||||||
; X86-NEXT: movl $29, %eax
|
; X86-NEXT: movl $29, %eax
|
||||||
; X86-NEXT: mull {{[0-9]+}}(%esp)
|
; X86-NEXT: mull {{[0-9]+}}(%esp)
|
||||||
; X86-NEXT: addl %ecx, %edx
|
; X86-NEXT: addl %ecx, %edx
|
||||||
|
@ -1533,14 +1534,16 @@ define i64 @test_mul_by_29(i64 %x) {
|
||||||
; X64-HSW: # BB#0:
|
; X64-HSW: # BB#0:
|
||||||
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
|
||||||
; X64-HSW-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50]
|
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
|
||||||
|
; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
|
||||||
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
; X64-HSW-NEXT: retq # sched: [2:1.00]
|
||||||
;
|
;
|
||||||
; X64-JAG-LABEL: test_mul_by_29:
|
; X64-JAG-LABEL: test_mul_by_29:
|
||||||
; X64-JAG: # BB#0:
|
; X64-JAG: # BB#0:
|
||||||
; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
|
||||||
; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
|
; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
|
||||||
; X64-JAG-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50]
|
; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
|
||||||
|
; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
|
||||||
; X64-JAG-NEXT: retq # sched: [4:1.00]
|
; X64-JAG-NEXT: retq # sched: [4:1.00]
|
||||||
;
|
;
|
||||||
; X86-NOOPT-LABEL: test_mul_by_29:
|
; X86-NOOPT-LABEL: test_mul_by_29:
|
||||||
|
|
|
@ -164,7 +164,8 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
|
||||||
; X86-NEXT: .LBB0_35:
|
; X86-NEXT: .LBB0_35:
|
||||||
; X86-NEXT: leal (%eax,%eax,8), %ecx
|
; X86-NEXT: leal (%eax,%eax,8), %ecx
|
||||||
; X86-NEXT: leal (%ecx,%ecx,2), %ecx
|
; X86-NEXT: leal (%ecx,%ecx,2), %ecx
|
||||||
; X86-NEXT: leal (%ecx,%eax,2), %eax
|
; X86-NEXT: addl %eax, %ecx
|
||||||
|
; X86-NEXT: addl %ecx, %eax
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
; X86-NEXT: .LBB0_36:
|
; X86-NEXT: .LBB0_36:
|
||||||
|
@ -322,15 +323,14 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 {
|
||||||
; X64-HSW-NEXT: .LBB0_31:
|
; X64-HSW-NEXT: .LBB0_31:
|
||||||
; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx
|
; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx
|
||||||
; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx
|
; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx
|
||||||
; X64-HSW-NEXT: .LBB0_17:
|
; X64-HSW-NEXT: jmp .LBB0_17
|
||||||
; X64-HSW-NEXT: addl %eax, %ecx
|
|
||||||
; X64-HSW-NEXT: movl %ecx, %eax
|
|
||||||
; X64-HSW-NEXT: # kill: %eax<def> %eax<kill> %rax<kill>
|
|
||||||
; X64-HSW-NEXT: retq
|
|
||||||
; X64-HSW-NEXT: .LBB0_32:
|
; X64-HSW-NEXT: .LBB0_32:
|
||||||
; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx
|
; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx
|
||||||
; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx
|
; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx
|
||||||
; X64-HSW-NEXT: leal (%rcx,%rax,2), %eax
|
; X64-HSW-NEXT: addl %eax, %ecx
|
||||||
|
; X64-HSW-NEXT: .LBB0_17:
|
||||||
|
; X64-HSW-NEXT: addl %eax, %ecx
|
||||||
|
; X64-HSW-NEXT: movl %ecx, %eax
|
||||||
; X64-HSW-NEXT: # kill: %eax<def> %eax<kill> %rax<kill>
|
; X64-HSW-NEXT: # kill: %eax<def> %eax<kill> %rax<kill>
|
||||||
; X64-HSW-NEXT: retq
|
; X64-HSW-NEXT: retq
|
||||||
; X64-HSW-NEXT: .LBB0_33:
|
; X64-HSW-NEXT: .LBB0_33:
|
||||||
|
|
|
@ -13,14 +13,14 @@
|
||||||
; X64-NEXT: .p2align
|
; X64-NEXT: .p2align
|
||||||
; X64: %loop
|
; X64: %loop
|
||||||
; no complex address modes
|
; no complex address modes
|
||||||
; X64-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}},
|
; X64-NOT: (%{{[^)]+}},%{{[^)]+}},
|
||||||
;
|
;
|
||||||
; X32: @simple
|
; X32: @simple
|
||||||
; no expensive address computation in the preheader
|
; no expensive address computation in the preheader
|
||||||
; X32-NOT: imul
|
; X32-NOT: imul
|
||||||
; X32: %loop
|
; X32: %loop
|
||||||
; no complex address modes
|
; no complex address modes
|
||||||
; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}},
|
; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
|
||||||
define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
|
define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
|
||||||
entry:
|
entry:
|
||||||
br label %loop
|
br label %loop
|
||||||
|
@ -103,7 +103,7 @@ exit:
|
||||||
; X32-NOT: mov{{.*}}(%esp){{$}}
|
; X32-NOT: mov{{.*}}(%esp){{$}}
|
||||||
; X32: %for.body{{$}}
|
; X32: %for.body{{$}}
|
||||||
; no complex address modes
|
; no complex address modes
|
||||||
; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}},
|
; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
|
||||||
; no reloads
|
; no reloads
|
||||||
; X32-NOT: (%esp)
|
; X32-NOT: (%esp)
|
||||||
define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
|
define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
|
||||||
|
|
Loading…
Reference in New Issue