From 9e658c974b0d2ad0a68be4bd32a15c2f28200b52 Mon Sep 17 00:00:00 2001 From: Matt Morehouse Date: Fri, 1 Dec 2017 22:20:26 +0000 Subject: [PATCH] Revert "[X86] Improvement in CodeGen instruction selection for LEAs." This reverts r319543, due to ASan bot breakage. llvm-svn: 319591 --- llvm/include/llvm/CodeGen/MachineInstr.h | 3 +- llvm/include/llvm/CodeGen/SelectionDAG.h | 3 - .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 11 - llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 88 +--- llvm/lib/Target/X86/X86OptimizeLEAs.cpp | 486 +----------------- .../CodeGen/X86/GlobalISel/callingconv.ll | 2 +- llvm/test/CodeGen/X86/GlobalISel/gep.ll | 34 +- .../CodeGen/X86/GlobalISel/memop-scalar.ll | 2 +- llvm/test/CodeGen/X86/lea-opt-cse1.ll | 12 +- llvm/test/CodeGen/X86/lea-opt-cse2.ll | 40 +- llvm/test/CodeGen/X86/lea-opt-cse3.ll | 34 +- llvm/test/CodeGen/X86/lea-opt-cse4.ll | 68 ++- llvm/test/CodeGen/X86/mul-constant-i16.ll | 12 +- llvm/test/CodeGen/X86/mul-constant-i32.ll | 15 +- llvm/test/CodeGen/X86/mul-constant-i64.ll | 9 +- llvm/test/CodeGen/X86/mul-constant-result.ll | 14 +- llvm/test/CodeGen/X86/umul-with-overflow.ll | 16 +- .../LoopStrengthReduce/X86/ivchain-X86.ll | 6 +- 18 files changed, 163 insertions(+), 692 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index c741ce622cd0..6c899ca7ee09 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1320,13 +1320,12 @@ public: /// Add all implicit def and use operands to this instruction. void addImplicitDefUseOperands(MachineFunction &MF); +private: /// If this instruction is embedded into a MachineFunction, return the /// MachineRegisterInfo object for the current function, otherwise /// return null. MachineRegisterInfo *getRegInfo(); -private: - /// Unlink all of the register operands in this instruction from their /// respective use lists. This requires that the operands already be on their /// use lists. diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 090d0e08a783..82f507e64b9e 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -300,9 +300,6 @@ public: /// type legalization. bool NewNodesMustHaveLegalTypes = false; - /// Set to true for DAG of BasicBlock contained inside a loop. - bool IsDAGPartOfLoop = false; - private: /// DAGUpdateListener is a friend so it can manipulate the listener stack. friend struct DAGUpdateListener; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 28e89b1be5c6..cb37137d547f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -27,7 +27,6 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/FastISel.h" @@ -326,8 +325,6 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { if (OptLevel != CodeGenOpt::None) AU.addRequired(); AU.addRequired(); - if (OptLevel != CodeGenOpt::None) - AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); @@ -1418,7 +1415,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Iterate over all basic blocks in the function. for (const BasicBlock *LLVMBB : RPOT) { - CurDAG->IsDAGPartOfLoop = false; if (OptLevel != CodeGenOpt::None) { bool AllPredsVisited = true; for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB); @@ -1596,13 +1592,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FunctionBasedInstrumentation); } - if (OptLevel != CodeGenOpt::None) { - auto &LIWP = getAnalysis(); - LoopInfo &LI = LIWP.getLoopInfo(); - if (LI.getLoopFor(LLVMBB)) - CurDAG->IsDAGPartOfLoop = true; - } - if (Begin != BI) ++NumDAGBlocks; else diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index b74eaf0daf52..93a3b9281d9c 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -88,11 +88,6 @@ namespace { IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr; } - bool hasComplexAddressingMode() const { - return Disp && IndexReg.getNode() != nullptr && - Base_Reg.getNode() != nullptr; - } - /// Return true if this addressing mode is already RIP-relative. bool isRIPRelative() const { if (BaseType != RegBase) return false; @@ -102,10 +97,6 @@ namespace { return false; } - bool isLegalScale() const { - return (Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8); - } - void setBaseReg(SDValue Reg) { BaseType = RegBase; Base_Reg = Reg; @@ -171,13 +162,10 @@ namespace { /// If true, selector should try to optimize for minimum code size. bool OptForMinSize; - /// If true, selector should try to aggresively fold operands into AM. - bool OptForAggressingFolding; - public: explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), OptForSize(false), - OptForMinSize(false), OptForAggressingFolding(false) {} + OptForMinSize(false) {} StringRef getPassName() const override { return "X86 DAG->DAG Instruction Selection"; @@ -196,12 +184,6 @@ namespace { void PreprocessISelDAG() override; - void setAggressiveOperandFolding(bool val) { - OptForAggressingFolding = val; - } - - bool getAggressiveOperandFolding() { return OptForAggressingFolding; } - // Include the pieces autogenerated from the target description. #include "X86GenDAGISel.inc" @@ -216,7 +198,6 @@ namespace { bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth); bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth); - bool matchAddressLEA(SDValue N, X86ISelAddressMode &AM); bool matchAddressBase(SDValue N, X86ISelAddressMode &AM); bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, @@ -466,20 +447,6 @@ namespace { bool isMaskZeroExtended(SDNode *N) const; }; - - class X86AggressiveOperandFolding { - public: - explicit X86AggressiveOperandFolding(X86DAGToDAGISel &ISel, bool val) - : Selector(&ISel) { - Selector->setAggressiveOperandFolding(val); - } - ~X86AggressiveOperandFolding() { - Selector->setAggressiveOperandFolding(false); - } - - private: - X86DAGToDAGISel *Selector; - }; } @@ -1227,7 +1194,7 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, AM.IndexReg = NewSRL; return false; } - + bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, unsigned Depth) { SDLoc dl(N); @@ -1235,14 +1202,8 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, dbgs() << "MatchAddress: "; AM.dump(); }); - - // Limit recursion. For aggressive operand folding recurse - // till depth 8 which is the maximum legal scale value. - auto getMaxOperandFoldingDepth = [&] () -> unsigned int { - return getAggressiveOperandFolding() ? 8 : 5; - }; - - if (Depth > getMaxOperandFoldingDepth()) + // Limit recursion. + if (Depth > 5) return matchAddressBase(N, AM); // If this is already a %rip relative address, we can only merge immediates @@ -1533,20 +1494,6 @@ bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) { return false; } - if (OptLevel != CodeGenOpt::None && getAggressiveOperandFolding() && - AM.BaseType == X86ISelAddressMode::RegBase) { - if (AM.Base_Reg == N) { - SDValue Base_Reg = AM.Base_Reg; - AM.Base_Reg = AM.IndexReg; - AM.IndexReg = Base_Reg; - AM.Scale++; - return false; - } else if (AM.IndexReg == N) { - AM.Scale++; - return false; - } - } - // Otherwise, we cannot select it. return true; } @@ -1782,7 +1729,7 @@ bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base, SDValue &Disp, SDValue &Segment) { // Save the debug loc before calling selectLEAAddr, in case it invalidates N. SDLoc DL(N); - + if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment)) return false; @@ -1817,29 +1764,6 @@ bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base, return true; } -bool X86DAGToDAGISel::matchAddressLEA(SDValue N, X86ISelAddressMode &AM) { - // Avoid enabling aggressive operand folding when node N is a part of loop. - X86AggressiveOperandFolding Enable(*this, !CurDAG->IsDAGPartOfLoop); - - bool matchRes = matchAddress(N, AM); - - // Check for legality of scale when recursion unwinds back to the top. - if (!matchRes) { - if (!AM.isLegalScale()) - return true; - - // Avoid creating costly complex LEAs having scale less than 2 - // within loop. - if(CurDAG->IsDAGPartOfLoop && Subtarget->slow3OpsLEA() && - AM.Scale <= 2 && AM.hasComplexAddressingMode() && - (!AM.hasSymbolicDisplacement() && N.getOpcode() < ISD::BUILTIN_OP_END)) - return true; - } - - return matchRes; -} - - /// Calls SelectAddr and determines if the maximal addressing /// mode it matches can be cost effectively emitted as an LEA instruction. bool X86DAGToDAGISel::selectLEAAddr(SDValue N, @@ -1857,7 +1781,7 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N, SDValue Copy = AM.Segment; SDValue T = CurDAG->getRegister(0, MVT::i32); AM.Segment = T; - if (matchAddressLEA(N, AM)) + if (matchAddress(N, AM)) return false; assert (T == AM.Segment); AM.Segment = Copy; diff --git a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp index 7783c116400e..cc136866c479 100644 --- a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp @@ -27,7 +27,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -59,7 +58,6 @@ static cl::opt cl::init(false)); STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions"); -STATISTIC(NumFactoredLEAs, "Number of LEAs factorized"); STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed"); /// \brief Returns true if two machine operands are identical and they are not @@ -67,10 +65,6 @@ STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed"); static inline bool isIdenticalOp(const MachineOperand &MO1, const MachineOperand &MO2); -/// \brief Returns true if two machine instructions have identical operands. -static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1, - const MachineOperand &MO2); - /// \brief Returns true if two address displacement operands are of the same /// type and use the same symbol/index/address regardless of the offset. static bool isSimilarDispOp(const MachineOperand &MO1, @@ -79,9 +73,6 @@ static bool isSimilarDispOp(const MachineOperand &MO1, /// \brief Returns true if the instruction is LEA. static inline bool isLEA(const MachineInstr &MI); -/// \brief Returns true if Definition of Operand is a copylike instruction. -static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr); - namespace { /// A key based on instruction's memory operands. @@ -89,35 +80,15 @@ class MemOpKey { public: MemOpKey(const MachineOperand *Base, const MachineOperand *Scale, const MachineOperand *Index, const MachineOperand *Segment, - const MachineOperand *Disp, bool DispCheck = false) - : Disp(Disp), DeepCheck(DispCheck) { + const MachineOperand *Disp) + : Disp(Disp) { Operands[0] = Base; Operands[1] = Scale; Operands[2] = Index; Operands[3] = Segment; } - /// Checks operands of MemOpKey are identical, if Base or Index - /// operand definitions are of kind SUBREG_TO_REG then compare - /// operands of defining MI. - bool performDeepCheck(const MemOpKey &Other) const { - MachineInstr *MI = const_cast(Operands[0]->getParent()); - MachineRegisterInfo *MRI = MI->getRegInfo(); - - for (int i = 0; i < 4; i++) { - bool CopyLike = isDefCopyLike(MRI, *Operands[i]); - if (CopyLike && !isIdenticalMI(MRI, *Operands[i], *Other.Operands[i])) - return false; - else if (!CopyLike && !isIdenticalOp(*Operands[i], *Other.Operands[i])) - return false; - } - return isIdenticalOp(*Disp, *Other.Disp); - } - bool operator==(const MemOpKey &Other) const { - if (DeepCheck) - return performDeepCheck(Other); - // Addresses' bases, scales, indices and segments must be identical. for (int i = 0; i < 4; ++i) if (!isIdenticalOp(*Operands[i], *Other.Operands[i])) @@ -135,12 +106,6 @@ public: // Address' displacement operand. const MachineOperand *Disp; - - // If true checks Address' base, index, segment and - // displacement are identical, in additions if base/index - // are defined by copylike instruction then futher - // compare the operands of the defining instruction. - bool DeepCheck; }; } // end anonymous namespace @@ -166,34 +131,12 @@ template <> struct DenseMapInfo { static unsigned getHashValue(const MemOpKey &Val) { // Checking any field of MemOpKey is enough to determine if the key is // empty or tombstone. - hash_code Hash(0); assert(Val.Disp != PtrInfo::getEmptyKey() && "Cannot hash the empty key"); assert(Val.Disp != PtrInfo::getTombstoneKey() && "Cannot hash the tombstone key"); - auto getMIHash = [](MachineInstr *MI) -> hash_code { - hash_code h(0); - for (unsigned i = 1, e = MI->getNumOperands(); i < e; i++) - h = hash_combine(h, MI->getOperand(i)); - return h; - }; - - const MachineOperand &Base = *Val.Operands[0]; - const MachineOperand &Index = *Val.Operands[2]; - MachineInstr *MI = const_cast(Base.getParent()); - MachineRegisterInfo *MRI = MI->getRegInfo(); - - if (isDefCopyLike(MRI, Base)) - Hash = getMIHash(MRI->getVRegDef(Base.getReg())); - else - Hash = hash_combine(Hash, Base); - - if (isDefCopyLike(MRI, Index)) - Hash = getMIHash(MRI->getVRegDef(Index.getReg())); - else - Hash = hash_combine(Hash, Index); - - Hash = hash_combine(Hash, *Val.Operands[1], *Val.Operands[3]); + hash_code Hash = hash_combine(*Val.Operands[0], *Val.Operands[1], + *Val.Operands[2], *Val.Operands[3]); // If the address displacement is an immediate, it should not affect the // hash so that memory operands which differ only be immediate displacement @@ -253,16 +196,6 @@ static inline MemOpKey getMemOpKey(const MachineInstr &MI, unsigned N) { &MI.getOperand(N + X86::AddrDisp)); } -static inline MemOpKey getMemOpCSEKey(const MachineInstr &MI, unsigned N) { - static MachineOperand DummyScale = MachineOperand::CreateImm(1); - assert((isLEA(MI) || MI.mayLoadOrStore()) && - "The instruction must be a LEA, a load or a store"); - return MemOpKey(&MI.getOperand(N + X86::AddrBaseReg), &DummyScale, - &MI.getOperand(N + X86::AddrIndexReg), - &MI.getOperand(N + X86::AddrSegmentReg), - &MI.getOperand(N + X86::AddrDisp), true); -} - static inline bool isIdenticalOp(const MachineOperand &MO1, const MachineOperand &MO2) { return MO1.isIdenticalTo(MO2) && @@ -270,27 +203,6 @@ static inline bool isIdenticalOp(const MachineOperand &MO1, !TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); } -static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1, - const MachineOperand &MO2) { - MachineInstr *MI1 = nullptr; - MachineInstr *MI2 = nullptr; - if (!MO1.isReg() || !MO2.isReg()) - return false; - - MI1 = MRI->getVRegDef(MO1.getReg()); - MI2 = MRI->getVRegDef(MO2.getReg()); - if (!MI1 || !MI2) - return false; - if (MI1->getOpcode() != MI2->getOpcode()) - return false; - if (MI1->getNumOperands() != MI2->getNumOperands()) - return false; - for (unsigned i = 1, e = MI1->getNumOperands(); i < e; ++i) - if (!isIdenticalOp(MI1->getOperand(i), MI2->getOperand(i))) - return false; - return true; -} - #ifndef NDEBUG static bool isValidDispOp(const MachineOperand &MO) { return MO.isImm() || MO.isCPI() || MO.isJTI() || MO.isSymbol() || @@ -322,150 +234,8 @@ static inline bool isLEA(const MachineInstr &MI) { Opcode == X86::LEA64r || Opcode == X86::LEA64_32r; } -static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr) { - bool isInstrErased = !(Opr.isReg() && Opr.getParent()->getParent()); - if (!Opr.isReg() || isInstrErased || - TargetRegisterInfo::isPhysicalRegister(Opr.getReg())) - return false; - MachineInstr *MI = MRI->getVRegDef(Opr.getReg()); - return MI && MI->isCopyLike(); -} - namespace { -/// This class captures the functions and attributes -/// needed to factorize LEA within and across basic -/// blocks.LEA instruction with same BASE,OFFSET and -/// INDEX are the candidates for factorization. -class FactorizeLEAOpt { -public: - using LEAListT = std::list; - using LEAMapT = DenseMap; - using ValueT = DenseMap; - using ScopeEntryT = std::pair; - using ScopeStackT = std::vector; - - FactorizeLEAOpt() = default; - FactorizeLEAOpt(const FactorizeLEAOpt &) = delete; - FactorizeLEAOpt &operator=(const FactorizeLEAOpt &) = delete; - - void performCleanup() { - for (auto LEA : removedLEAs) - LEA->eraseFromParent(); - LEAs.clear(); - Stack.clear(); - removedLEAs.clear(); - } - - LEAMapT &getLEAMap() { return LEAs; } - ScopeEntryT *getTopScope() { return &Stack.back(); } - - void addForLazyRemoval(MachineInstr *Instr) { removedLEAs.insert(Instr); } - - bool checkIfScheduledForRemoval(MachineInstr *Instr) { - return removedLEAs.find(Instr) != removedLEAs.end(); - } - - /// Push the ScopeEntry for the BasicBlock over Stack. - /// Also traverses over list of instruction and update - /// LEAs Map and ScopeEntry for each LEA instruction - /// found using insertLEA(). - void collectDataForBasicBlock(MachineBasicBlock *MBB); - - /// Stores the size of MachineInstr list corrosponding - /// to key K from LEAs MAP into the ScopeEntry of - /// the basic block, then insert the LEA at the beginning - /// of the list. - void insertLEA(MachineInstr *MI); - - /// Pops out ScopeEntry of top most BasicBlock from the stack - /// and remove the LEA instructions contained in the scope - /// from the LEAs Map. - void removeDataForBasicBlock(); - - /// If LEA contains Physical Registers then its not a candidate - /// for factorizations since physical registers may violate SSA - /// semantics of MI. - bool containsPhyReg(MachineInstr *MI, unsigned RecLevel); - -private: - ScopeStackT Stack; - LEAMapT LEAs; - std::set removedLEAs; -}; - -void FactorizeLEAOpt::collectDataForBasicBlock(MachineBasicBlock *MBB) { - ValueT EmptyMap; - ScopeEntryT SE = std::make_pair(MBB, EmptyMap); - Stack.push_back(SE); - for (auto &MI : *MBB) { - if (isLEA(MI)) - insertLEA(&MI); - } -} - -void FactorizeLEAOpt::removeDataForBasicBlock() { - ScopeEntryT &SE = Stack.back(); - for (auto MapEntry : SE.second) { - LEAMapT::iterator Itr = LEAs.find(MapEntry.first); - assert((Itr != LEAs.end()) && - "LEAs map must have a node corresponding to ScopeEntry's Key."); - - while (((*Itr).second.size() > MapEntry.second)) - (*Itr).second.pop_front(); - // If list goes empty remove entry from LEAs Map. - if ((*Itr).second.empty()) - LEAs.erase(Itr); - } - Stack.pop_back(); -} - -bool FactorizeLEAOpt::containsPhyReg(MachineInstr *MI, unsigned RecLevel) { - if (!MI || !RecLevel) - return false; - - MachineRegisterInfo *MRI = MI->getRegInfo(); - for (auto Operand : MI->operands()) { - if (!Operand.isReg()) - continue; - if (TargetRegisterInfo::isPhysicalRegister(Operand.getReg())) - return true; - MachineInstr *OperDefMI = MRI->getVRegDef(Operand.getReg()); - if (OperDefMI && (MI != OperDefMI) && OperDefMI->isCopyLike() && - containsPhyReg(OperDefMI, RecLevel - 1)) - return true; - } - return false; -} - -void FactorizeLEAOpt::insertLEA(MachineInstr *MI) { - unsigned lsize; - if (containsPhyReg(MI, 2)) - return; - - // Factorization is beneficial only for complex LEAs. - MachineOperand &Base = MI->getOperand(1); - MachineOperand &Index = MI->getOperand(3); - MachineOperand &Offset = MI->getOperand(4); - if ((Offset.isImm() && !Offset.getImm()) || - (!Base.isReg() || !Base.getReg()) || (!Index.isReg() || !Index.getReg())) - return; - - MemOpKey Key = getMemOpCSEKey(*MI, 1); - ScopeEntryT *TopScope = getTopScope(); - - LEAMapT::iterator Itr = LEAs.find(Key); - if (Itr == LEAs.end()) { - lsize = 0; - LEAs[Key].push_front(MI); - } else { - lsize = (*Itr).second.size(); - (*Itr).second.push_front(MI); - } - if (TopScope->second.find(Key) == TopScope->second.end()) - TopScope->second[Key] = lsize; -} - class OptimizeLEAPass : public MachineFunctionPass { public: OptimizeLEAPass() : MachineFunctionPass(ID) {} @@ -477,12 +247,6 @@ public: /// been calculated by LEA. Also, remove redundant LEAs. bool runOnMachineFunction(MachineFunction &MF) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired(); - } - private: using MemOpMap = DenseMap>; @@ -528,24 +292,8 @@ private: /// \brief Removes LEAs which calculate similar addresses. bool removeRedundantLEAs(MemOpMap &LEAs); - /// \brief Visit over basic blocks, collect LEAs in a scoped - /// hash map (FactorizeLEAOpt::LEAs) and try to factor them out. - bool FactorizeLEAsAllBasicBlocks(MachineFunction &MF); - - bool FactorizeLEAsBasicBlock(MachineDomTreeNode *DN); - - /// \brief Factor out LEAs which share Base,Index,Offset and Segment. - bool processBasicBlock(const MachineBasicBlock &MBB); - - /// \brief Try to replace LEA with a lower strength instruction - /// to improves latency and throughput. - bool strengthReduceLEAs(MemOpMap &LEAs, const MachineBasicBlock &MBB); - DenseMap InstrPos; - FactorizeLEAOpt FactorOpt; - - MachineDominatorTree *DT; MachineRegisterInfo *MRI; const X86InstrInfo *TII; const X86RegisterInfo *TRI; @@ -741,9 +489,7 @@ void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, MemOpMap &LEAs) { bool OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) { bool Changed = false; - if (LEAs.empty()) - return Changed; - + assert(!LEAs.empty()); MachineBasicBlock *MBB = (*LEAs.begin()->second.begin())->getParent(); // Process all instructions in basic block. @@ -913,10 +659,6 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { // Erase removed LEA from the list. I2 = List.erase(I2); - // If List becomes empty remove it from LEAs map. - if (List.empty()) - LEAs.erase(E.first); - Changed = true; } ++I1; @@ -926,217 +668,6 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { return Changed; } -static inline int getADDrrFromLEA(int LEAOpcode) { - switch (LEAOpcode) { - default: - llvm_unreachable("Unexpected LEA instruction"); - case X86::LEA16r: - return X86::ADD16rr; - case X86::LEA32r: - return X86::ADD32rr; - case X86::LEA64_32r: - case X86::LEA64r: - return X86::ADD64rr; - } -} - -bool OptimizeLEAPass::strengthReduceLEAs(MemOpMap &LEAs, - const MachineBasicBlock &BB) { - bool Changed = false; - - // Loop over all entries in the table. - for (auto &E : LEAs) { - auto &List = E.second; - - // Loop over all LEA pairs. - auto I1 = List.begin(); - while (I1 != List.end()) { - MachineInstrBuilder NewMI; - MachineInstr &First = **I1; - MachineOperand &Res = First.getOperand(0); - MachineOperand &Base = First.getOperand(1); - MachineOperand &Scale = First.getOperand(2); - MachineOperand &Index = First.getOperand(3); - MachineOperand &Offset = First.getOperand(4); - - const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(First.getOpcode())); - const DebugLoc DL = First.getDebugLoc(); - - if (!Base.isReg() || !Index.isReg() || !Index.getReg()) { - I1++; - continue; - } - - if (TargetRegisterInfo::isPhysicalRegister(Res.getReg()) || - TargetRegisterInfo::isPhysicalRegister(Base.getReg()) || - TargetRegisterInfo::isPhysicalRegister(Index.getReg())) { - I1++; - continue; - } - - // Check for register class compatibility between Result and - // Index operands. - const TargetRegisterClass *ResRC = MRI->getRegClass(Res.getReg()); - const TargetRegisterClass *IndexRC = MRI->getRegClass(Index.getReg()); - if (TRI->getRegSizeInBits(*ResRC) != TRI->getRegSizeInBits(*IndexRC)) { - I1++; - continue; - } - - MachineBasicBlock &MBB = *(const_cast(&BB)); - // R = B + I - if (Scale.isImm() && Scale.getImm() == 1 && Offset.isImm() && - !Offset.getImm()) { - NewMI = BuildMI(MBB, &First, DL, ADDrr) - .addDef(Res.getReg()) - .addUse(Base.getReg()) - .addUse(Index.getReg()); - Changed = NewMI.getInstr() != nullptr; - First.eraseFromParent(); - I1 = List.erase(I1); - - // If List becomes empty remove it from LEAs map. - if (List.empty()) - LEAs.erase(E.first); - } else - I1++; - } - } - return Changed; -} - -bool OptimizeLEAPass::processBasicBlock(const MachineBasicBlock &MBB) { - bool cseDone = false; - - // Legal scale value (1,2,4 & 8) vector. - auto LegalScale = [](int scale) { - return scale == 1 || scale == 2 || scale == 4 || scale == 8; - }; - - auto CompareFn = [](const MachineInstr *Arg1, - const MachineInstr *Arg2) -> bool { - return Arg1->getOperand(2).getImm() >= Arg2->getOperand(2).getImm(); - }; - - // Loop over all entries in the table. - for (auto &E : FactorOpt.getLEAMap()) { - auto &List = E.second; - if (List.size() > 1) - List.sort(CompareFn); - - // Loop over all LEA pairs. - for (auto Iter1 = List.begin(); Iter1 != List.end(); Iter1++) { - for (auto Iter2 = std::next(Iter1); Iter2 != List.end(); Iter2++) { - MachineInstr &LI1 = **Iter1; - MachineInstr &LI2 = **Iter2; - - if (!DT->dominates(&LI2, &LI1)) - continue; - - int Scale1 = LI1.getOperand(2).getImm(); - int Scale2 = LI2.getOperand(2).getImm(); - assert(LI2.getOperand(0).isReg() && "Result is a VirtualReg"); - DebugLoc DL = LI1.getDebugLoc(); - - // Continue if instruction has already been factorized. - if (FactorOpt.checkIfScheduledForRemoval(&LI1)) - continue; - - int Factor = Scale1 - Scale2; - if (Factor > 0 && LegalScale(Factor)) { - MachineOperand NewBase = LI2.getOperand(0); - MachineOperand NewIndex = LI1.getOperand(3); - - const TargetRegisterClass *LI2ResRC = - MRI->getRegClass(LI2.getOperand(0).getReg()); - const TargetRegisterClass *LI1BaseRC = - MRI->getRegClass(LI1.getOperand(1).getReg()); - - if (TRI->getRegSizeInBits(*LI1BaseRC) > - TRI->getRegSizeInBits(*LI2ResRC)) { - MachineInstr *LI1IndexDef = - MRI->getVRegDef(LI1.getOperand(3).getReg()); - if (LI1IndexDef->getOpcode() != TargetOpcode::SUBREG_TO_REG) - continue; - MachineOperand &SubReg = LI1IndexDef->getOperand(2); - const TargetRegisterClass *SubRegRC = - MRI->getRegClass(SubReg.getReg()); - if (TRI->getRegSizeInBits(*SubRegRC) != - TRI->getRegSizeInBits(*LI2ResRC)) - continue; - NewIndex = SubReg; - } - - DEBUG(dbgs() << "CSE LEAs: Candidate to replace: "; LI1.dump();); - MachineInstrBuilder NewMI = - BuildMI(*(const_cast(&MBB)), &LI1, DL, - TII->get(LI1.getOpcode())) - .addDef(LI1.getOperand(0).getReg()) // Dst = Dst of LI1. - .addUse(NewBase.getReg()) // Base = Dst to LI2. - .addImm(Factor) // Scale = Diff b/w scales. - .addUse(NewIndex.getReg()) // Index = Index of LI1. - .addImm(0) // Disp = 0 - .addUse( - LI1.getOperand(5).getReg()); // Segment = Segmant of LI1. - - cseDone = NewMI.getInstr() != nullptr; - - /// To preserve the SSA nature we need to remove Def flag - /// from old result. - LI1.getOperand(0).setIsDef(false); - - /// Lazy removal shall ensure that replaced LEA remains - /// till we finish processing all the basic block. This shall - /// provide opportunity for further factorization based on - /// the replaced LEA which will be legal since it has same - /// destination as newly formed LEA. - FactorOpt.addForLazyRemoval(&LI1); - - NumFactoredLEAs++; - DEBUG(dbgs() << "CSE LEAs: Replaced by: "; NewMI->dump();); - } - } - } - } - return cseDone; -} - -bool OptimizeLEAPass::FactorizeLEAsBasicBlock(MachineDomTreeNode *DN) { - bool Changed = false; - using StackT = SmallVector; - using VisitedNodeMapT = SmallSet; - - StackT WorkList; - VisitedNodeMapT DomNodeMap; - - WorkList.push_back(DN); - while (!WorkList.empty()) { - MachineDomTreeNode *MDN = WorkList.back(); - FactorOpt.collectDataForBasicBlock(MDN->getBlock()); - Changed |= processBasicBlock(*MDN->getBlock()); - - if (DomNodeMap.find(MDN) == DomNodeMap.end()) { - DomNodeMap.insert(MDN); - for (auto Child : MDN->getChildren()) - WorkList.push_back(Child); - } - - MachineDomTreeNode *TDM = WorkList.back(); - if (MDN->getLevel() == TDM->getLevel()) { - FactorOpt.removeDataForBasicBlock(); - DomNodeMap.erase(MDN); - WorkList.pop_back(); - } - } - return Changed; -} - -bool OptimizeLEAPass::FactorizeLEAsAllBasicBlocks(MachineFunction &MF) { - bool Changed = FactorizeLEAsBasicBlock(DT->getRootNode()); - FactorOpt.performCleanup(); - return Changed; -} - bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; @@ -1146,10 +677,6 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); - DT = &getAnalysis(); - - // Attempt factorizing LEAs. - Changed |= FactorizeLEAsAllBasicBlocks(MF); // Process all basic blocks. for (auto &MBB : MF) { @@ -1166,9 +693,6 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { // Remove redundant LEA instructions. Changed |= removeRedundantLEAs(LEAs); - // Strength reduce LEA instructions. - Changed |= strengthReduceLEAs(LEAs, MBB); - // Remove redundant address calculations. Do it only for -Os/-Oz since only // a code size gain is expected from this part of the pass. if (MF.getFunction()->optForSize()) diff --git a/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll index a55ff862e2b8..4100a7217ac3 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll @@ -388,7 +388,7 @@ define void @test_variadic_call_2(i8** %addr_ptr, double* %val_ptr) { ; X32-NEXT: movl 4(%ecx), %ecx ; X32-NEXT: movl %eax, (%esp) ; X32-NEXT: movl $4, %eax -; X32-NEXT: addl %esp, %eax +; X32-NEXT: leal (%esp,%eax), %eax ; X32-NEXT: movl %edx, 4(%esp) ; X32-NEXT: movl %ecx, 4(%eax) ; X32-NEXT: calll variadic_callee diff --git a/llvm/test/CodeGen/X86/GlobalISel/gep.ll b/llvm/test/CodeGen/X86/GlobalISel/gep.ll index 95ad8d4eb30e..ee66accc77d1 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/gep.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/gep.ll @@ -5,10 +5,10 @@ define i32* @test_gep_i8(i32 *%arr, i8 %ind) { ; X64_GISEL-LABEL: test_gep_i8: ; X64_GISEL: # BB#0: -; X64_GISEL-NEXT: movq $4, %rcx -; X64_GISEL-NEXT: movsbq %sil, %rax -; X64_GISEL-NEXT: imulq %rcx, %rax -; X64_GISEL-NEXT: addq %rdi, %rax +; X64_GISEL-NEXT: movq $4, %rax +; X64_GISEL-NEXT: movsbq %sil, %rcx +; X64_GISEL-NEXT: imulq %rax, %rcx +; X64_GISEL-NEXT: leaq (%rdi,%rcx), %rax ; X64_GISEL-NEXT: retq ; ; X64-LABEL: test_gep_i8: @@ -25,7 +25,7 @@ define i32* @test_gep_i8_const(i32 *%arr) { ; X64_GISEL-LABEL: test_gep_i8_const: ; X64_GISEL: # BB#0: ; X64_GISEL-NEXT: movq $80, %rax -; X64_GISEL-NEXT: addq %rdi, %rax +; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax ; X64_GISEL-NEXT: retq ; ; X64-LABEL: test_gep_i8_const: @@ -39,10 +39,10 @@ define i32* @test_gep_i8_const(i32 *%arr) { define i32* @test_gep_i16(i32 *%arr, i16 %ind) { ; X64_GISEL-LABEL: test_gep_i16: ; X64_GISEL: # BB#0: -; X64_GISEL-NEXT: movq $4, %rcx -; X64_GISEL-NEXT: movswq %si, %rax -; X64_GISEL-NEXT: imulq %rcx, %rax -; X64_GISEL-NEXT: addq %rdi, %rax +; X64_GISEL-NEXT: movq $4, %rax +; X64_GISEL-NEXT: movswq %si, %rcx +; X64_GISEL-NEXT: imulq %rax, %rcx +; X64_GISEL-NEXT: leaq (%rdi,%rcx), %rax ; X64_GISEL-NEXT: retq ; ; X64-LABEL: test_gep_i16: @@ -59,7 +59,7 @@ define i32* @test_gep_i16_const(i32 *%arr) { ; X64_GISEL-LABEL: test_gep_i16_const: ; X64_GISEL: # BB#0: ; X64_GISEL-NEXT: movq $80, %rax -; X64_GISEL-NEXT: addq %rdi, %rax +; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax ; X64_GISEL-NEXT: retq ; ; X64-LABEL: test_gep_i16_const: @@ -73,10 +73,10 @@ define i32* @test_gep_i16_const(i32 *%arr) { define i32* @test_gep_i32(i32 *%arr, i32 %ind) { ; X64_GISEL-LABEL: test_gep_i32: ; X64_GISEL: # BB#0: -; X64_GISEL-NEXT: movq $4, %rcx -; X64_GISEL-NEXT: movslq %esi, %rax -; X64_GISEL-NEXT: imulq %rcx, %rax -; X64_GISEL-NEXT: addq %rdi, %rax +; X64_GISEL-NEXT: movq $4, %rax +; X64_GISEL-NEXT: movslq %esi, %rcx +; X64_GISEL-NEXT: imulq %rax, %rcx +; X64_GISEL-NEXT: leaq (%rdi,%rcx), %rax ; X64_GISEL-NEXT: retq ; ; X64-LABEL: test_gep_i32: @@ -92,7 +92,7 @@ define i32* @test_gep_i32_const(i32 *%arr) { ; X64_GISEL-LABEL: test_gep_i32_const: ; X64_GISEL: # BB#0: ; X64_GISEL-NEXT: movq $20, %rax -; X64_GISEL-NEXT: addq %rdi, %rax +; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax ; X64_GISEL-NEXT: retq ; ; X64-LABEL: test_gep_i32_const: @@ -108,7 +108,7 @@ define i32* @test_gep_i64(i32 *%arr, i64 %ind) { ; X64_GISEL: # BB#0: ; X64_GISEL-NEXT: movq $4, %rax ; X64_GISEL-NEXT: imulq %rsi, %rax -; X64_GISEL-NEXT: addq %rdi, %rax +; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax ; X64_GISEL-NEXT: retq ; ; X64-LABEL: test_gep_i64: @@ -123,7 +123,7 @@ define i32* @test_gep_i64_const(i32 *%arr) { ; X64_GISEL-LABEL: test_gep_i64_const: ; X64_GISEL: # BB#0: ; X64_GISEL-NEXT: movq $20, %rax -; X64_GISEL-NEXT: addq %rdi, %rax +; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax ; X64_GISEL-NEXT: retq ; ; X64-LABEL: test_gep_i64_const: diff --git a/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll index c73a43177e3b..2097a3b0bfc9 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll @@ -181,7 +181,7 @@ define i32 @test_gep_folding_largeGepIndex(i32* %arr, i32 %val) { ; ALL-LABEL: test_gep_folding_largeGepIndex: ; ALL: # BB#0: ; ALL-NEXT: movabsq $228719476720, %rax # imm = 0x3540BE3FF0 -; ALL-NEXT: addq %rdi, %rax +; ALL-NEXT: leaq (%rdi,%rax), %rax ; ALL-NEXT: movl %esi, (%rax) ; ALL-NEXT: movl (%rax), %eax ; ALL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/lea-opt-cse1.ll b/llvm/test/CodeGen/X86/lea-opt-cse1.ll index 512740ce0751..05b47690e819 100644 --- a/llvm/test/CodeGen/X86/lea-opt-cse1.ll +++ b/llvm/test/CodeGen/X86/lea-opt-cse1.ll @@ -9,21 +9,27 @@ define void @test_func(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr { ; X64: # BB#0: # %entry ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: movl 16(%rdi), %ecx +; X64-NEXT: leal (%rax,%rcx), %edx ; X64-NEXT: leal 1(%rax,%rcx), %eax ; X64-NEXT: movl %eax, 12(%rdi) -; X64-NEXT: addq %ecx, %eax +; X64-NEXT: leal 1(%rcx,%rdx), %eax ; X64-NEXT: movl %eax, 16(%rdi) ; X64-NEXT: retq ; ; X86-LABEL: test_func: ; X86: # BB#0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl (%eax), %ecx ; X86-NEXT: movl 16(%eax), %edx -; X86-NEXT: leal 1(%ecx,%edx), %ecx -; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: leal 1(%ecx,%edx), %esi ; X86-NEXT: addl %edx, %ecx +; X86-NEXT: movl %esi, 12(%eax) +; X86-NEXT: leal 1(%edx,%ecx), %ecx ; X86-NEXT: movl %ecx, 16(%eax) +; X86-NEXT: popl %esi ; X86-NEXT: retl entry: %h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0 diff --git a/llvm/test/CodeGen/X86/lea-opt-cse2.ll b/llvm/test/CodeGen/X86/lea-opt-cse2.ll index 4226e3d25c8e..865dd49a6e1f 100644 --- a/llvm/test/CodeGen/X86/lea-opt-cse2.ll +++ b/llvm/test/CodeGen/X86/lea-opt-cse2.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X64 -; RUN: llc < %s -mtriple=i686-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s -check-prefix=X86 %struct.SA = type { i32 , i32 , i32 , i32 , i32}; @@ -10,39 +10,43 @@ define void @foo(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 { ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB0_1: # %loop ; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl 16(%rdi), %eax -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: incl %ecx -; X64-NEXT: movl %ecx, 12(%rdi) +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movl 16(%rdi), %ecx +; X64-NEXT: leal 1(%rax,%rcx), %edx +; X64-NEXT: movl %edx, 12(%rdi) ; X64-NEXT: decl %esi ; X64-NEXT: jne .LBB0_1 ; X64-NEXT: # BB#2: # %exit -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: movl %ecx, 16(%rdi) +; X64-NEXT: addl %ecx, %eax +; X64-NEXT: leal 1(%rcx,%rax), %eax +; X64-NEXT: movl %eax, 16(%rdi) ; X64-NEXT: retq ; ; X86-LABEL: foo: ; X86: # BB#0: # %entry -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %edi ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: .cfi_offset %esi, -12 +; X86-NEXT: .cfi_offset %edi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: .p2align 4, 0x90 ; X86-NEXT: .LBB0_1: # %loop ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl 16(%eax), %edx -; X86-NEXT: movl (%eax), %esi -; X86-NEXT: addl %edx, %esi -; X86-NEXT: incl %esi -; X86-NEXT: movl %esi, 12(%eax) +; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl 16(%eax), %esi +; X86-NEXT: leal 1(%edx,%esi), %edi +; X86-NEXT: movl %edi, 12(%eax) ; X86-NEXT: decl %ecx ; X86-NEXT: jne .LBB0_1 ; X86-NEXT: # BB#2: # %exit -; X86-NEXT: addl %edx, %esi -; X86-NEXT: movl %esi, 16(%eax) +; X86-NEXT: addl %esi, %edx +; X86-NEXT: leal 1(%esi,%edx), %ecx +; X86-NEXT: movl %ecx, 16(%eax) ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl entry: br label %loop diff --git a/llvm/test/CodeGen/X86/lea-opt-cse3.ll b/llvm/test/CodeGen/X86/lea-opt-cse3.ll index 57c20cceed16..48ab3130bf0a 100644 --- a/llvm/test/CodeGen/X86/lea-opt-cse3.ll +++ b/llvm/test/CodeGen/X86/lea-opt-cse3.ll @@ -8,7 +8,7 @@ define i32 @foo(i32 %a, i32 %b) local_unnamed_addr #0 { ; X64-NEXT: # kill: %esi %esi %rsi ; X64-NEXT: # kill: %edi %edi %rdi ; X64-NEXT: leal 4(%rdi,%rsi,2), %ecx -; X64-NEXT: leal (%ecx,%esi,2), %eax +; X64-NEXT: leal 4(%rdi,%rsi,4), %eax ; X64-NEXT: imull %ecx, %eax ; X64-NEXT: retq ; @@ -16,9 +16,9 @@ define i32 @foo(i32 %a, i32 %b) local_unnamed_addr #0 { ; X86: # BB#0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal 4(%ecx,%eax,2), %ecx -; X86-NEXT: leal (%ecx,%eax,2), %eax -; X86-NEXT: imull %ecx, %eax +; X86-NEXT: leal 4(%ecx,%eax,2), %edx +; X86-NEXT: leal 4(%ecx,%eax,4), %eax +; X86-NEXT: imull %edx, %eax ; X86-NEXT: retl entry: %mul = shl i32 %b, 1 @@ -36,7 +36,7 @@ define i32 @foo1(i32 %a, i32 %b) local_unnamed_addr #0 { ; X64-NEXT: # kill: %esi %esi %rsi ; X64-NEXT: # kill: %edi %edi %rdi ; X64-NEXT: leal 4(%rdi,%rsi,4), %ecx -; X64-NEXT: leal (%ecx,%esi,4), %eax +; X64-NEXT: leal 4(%rdi,%rsi,8), %eax ; X64-NEXT: imull %ecx, %eax ; X64-NEXT: retq ; @@ -44,9 +44,9 @@ define i32 @foo1(i32 %a, i32 %b) local_unnamed_addr #0 { ; X86: # BB#0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: leal 4(%ecx,%eax,4), %ecx -; X86-NEXT: leal (%ecx,%eax,4), %eax -; X86-NEXT: imull %ecx, %eax +; X86-NEXT: leal 4(%ecx,%eax,4), %edx +; X86-NEXT: leal 4(%ecx,%eax,8), %eax +; X86-NEXT: imull %edx, %eax ; X86-NEXT: retl entry: %mul = shl i32 %b, 2 @@ -68,23 +68,29 @@ define i32 @foo1_mult_basic_blocks(i32 %a, i32 %b) local_unnamed_addr #0 { ; X64-NEXT: cmpl $10, %ecx ; X64-NEXT: je .LBB2_2 ; X64-NEXT: # BB#1: # %mid -; X64-NEXT: leal (%ecx,%esi,4), %eax -; X64-NEXT: imull %ecx, %eax +; X64-NEXT: leal 4(%rdi,%rsi,8), %eax +; X64-NEXT: imull %eax, %ecx +; X64-NEXT: movl %ecx, %eax ; X64-NEXT: .LBB2_2: # %exit ; X64-NEXT: retq ; ; X86-LABEL: foo1_mult_basic_blocks: ; X86: # BB#0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal 4(%eax,%edx,4), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: leal 4(%esi,%edx,4), %ecx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl $10, %ecx ; X86-NEXT: je .LBB2_2 ; X86-NEXT: # BB#1: # %mid -; X86-NEXT: leal (%ecx,%edx,4), %eax -; X86-NEXT: imull %ecx, %eax +; X86-NEXT: leal 4(%esi,%edx,8), %eax +; X86-NEXT: imull %eax, %ecx +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: .LBB2_2: # %exit +; X86-NEXT: popl %esi ; X86-NEXT: retl entry: %mul = shl i32 %b, 2 diff --git a/llvm/test/CodeGen/X86/lea-opt-cse4.ll b/llvm/test/CodeGen/X86/lea-opt-cse4.ll index 6f1fe282f92e..31f31a73d44e 100644 --- a/llvm/test/CodeGen/X86/lea-opt-cse4.ll +++ b/llvm/test/CodeGen/X86/lea-opt-cse4.ll @@ -1,31 +1,41 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X64 -; RUN: llc < %s -mtriple=i686-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s -check-prefix=X86 %struct.SA = type { i32 , i32 , i32 , i32 , i32}; define void @foo(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 { ; X64-LABEL: foo: ; X64: # BB#0: # %entry -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl 16(%rdi), %ecx -; X64-NEXT: leal (%rax,%rcx,4), %eax -; X64-NEXT: addl $1, %eax -; X64-NEXT: movl %eax, 12(%rdi) -; X64-NEXT: addl %ecx, %eax +; X64-NEXT: movl 16(%rdi), %eax +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: addl %eax, %ecx +; X64-NEXT: addl %eax, %ecx +; X64-NEXT: addl %eax, %ecx +; X64-NEXT: leal (%rcx,%rax), %edx +; X64-NEXT: leal 1(%rax,%rcx), %ecx +; X64-NEXT: movl %ecx, 12(%rdi) +; X64-NEXT: leal 1(%rax,%rdx), %eax ; X64-NEXT: movl %eax, 16(%rdi) ; X64-NEXT: retq ; ; X86-LABEL: foo: ; X86: # BB#0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %ecx -; X86-NEXT: movl 16(%eax), %edx -; X86-NEXT: leal (%ecx,%edx,4), %ecx -; X86-NEXT: addl $1, %ecx -; X86-NEXT: movl %ecx, 12(%eax) -; X86-NEXT: addl %edx, %ecx +; X86-NEXT: movl 16(%eax), %ecx +; X86-NEXT: movl (%eax), %edx +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: leal 1(%ecx,%edx), %esi +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: movl %esi, 12(%eax) +; X86-NEXT: leal 1(%ecx,%edx), %ecx ; X86-NEXT: movl %ecx, 16(%eax) +; X86-NEXT: popl %esi ; X86-NEXT: retl entry: %h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0 @@ -52,15 +62,15 @@ define void @foo_loop(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB1_1: # %loop ; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl 16(%rdi), %eax ; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: addl %eax, %ecx -; X64-NEXT: incl %ecx -; X64-NEXT: movl %ecx, 12(%rdi) +; X64-NEXT: movl 16(%rdi), %eax +; X64-NEXT: leal 1(%rcx,%rax), %edx +; X64-NEXT: movl %edx, 12(%rdi) ; X64-NEXT: decl %esi ; X64-NEXT: jne .LBB1_1 ; X64-NEXT: # BB#2: # %exit ; X64-NEXT: addl %eax, %ecx +; X64-NEXT: leal 1(%rax,%rcx), %ecx ; X64-NEXT: addl %eax, %ecx ; X64-NEXT: addl %eax, %ecx ; X64-NEXT: addl %eax, %ecx @@ -72,23 +82,26 @@ define void @foo_loop(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 ; ; X86-LABEL: foo_loop: ; X86: # BB#0: # %entry -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %edi ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: .cfi_offset %esi, -12 +; X86-NEXT: .cfi_offset %edi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: .p2align 4, 0x90 ; X86-NEXT: .LBB1_1: # %loop ; X86-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NEXT: movl (%eax), %esi ; X86-NEXT: movl 16(%eax), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: addl %ecx, %edx -; X86-NEXT: incl %edx -; X86-NEXT: movl %edx, 12(%eax) -; X86-NEXT: decl %esi +; X86-NEXT: leal 1(%esi,%ecx), %edi +; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: decl %edx ; X86-NEXT: jne .LBB1_1 ; X86-NEXT: # BB#2: # %exit -; X86-NEXT: addl %ecx, %edx +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: leal 1(%ecx,%esi), %edx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: addl %ecx, %edx @@ -97,6 +110,7 @@ define void @foo_loop(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: movl %edx, 16(%eax) ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl entry: br label %loop diff --git a/llvm/test/CodeGen/X86/mul-constant-i16.ll b/llvm/test/CodeGen/X86/mul-constant-i16.ll index 72731b3ba675..c3b822ac2145 100644 --- a/llvm/test/CodeGen/X86/mul-constant-i16.ll +++ b/llvm/test/CodeGen/X86/mul-constant-i16.ll @@ -558,10 +558,11 @@ define i16 @test_mul_by_28(i16 %x) { define i16 @test_mul_by_29(i16 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (%eax,%eax,8), %ecx -; X86-NEXT: leal (%ecx,%ecx,2), %ecx -; X86-NEXT: leal (%ecx,%eax,2), %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: %ax %ax %eax ; X86-NEXT: retl ; @@ -570,7 +571,8 @@ define i16 @test_mul_by_29(i16 %x) { ; X64-NEXT: # kill: %edi %edi %rdi ; X64-NEXT: leal (%rdi,%rdi,8), %eax ; X64-NEXT: leal (%rax,%rax,2), %eax -; X64-NEXT: leal (%rax,%rdi,2), %eax +; X64-NEXT: addl %edi, %eax +; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: %ax %ax %eax ; X64-NEXT: retq %mul = mul nsw i16 %x, 29 diff --git a/llvm/test/CodeGen/X86/mul-constant-i32.ll b/llvm/test/CodeGen/X86/mul-constant-i32.ll index c8ac6a70927a..228dd5e5f374 100644 --- a/llvm/test/CodeGen/X86/mul-constant-i32.ll +++ b/llvm/test/CodeGen/X86/mul-constant-i32.ll @@ -1457,10 +1457,11 @@ define i32 @test_mul_by_28(i32 %x) { define i32 @test_mul_by_29(i32 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # BB#0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: leal (%eax,%eax,8), %ecx -; X86-NEXT: leal (%ecx,%ecx,2), %ecx -; X86-NEXT: leal (%ecx,%eax,2), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; ; X64-HSW-LABEL: test_mul_by_29: @@ -1468,7 +1469,8 @@ define i32 @test_mul_by_29(i32 %x) { ; X64-HSW-NEXT: # kill: %edi %edi %rdi ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_29: @@ -1476,7 +1478,8 @@ define i32 @test_mul_by_29(i32 %x) { ; X64-JAG-NEXT: # kill: %edi %edi %rdi ; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] -; X64-JAG-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_29: diff --git a/llvm/test/CodeGen/X86/mul-constant-i64.ll b/llvm/test/CodeGen/X86/mul-constant-i64.ll index 8e171f973e0a..98568a6fc8e9 100644 --- a/llvm/test/CodeGen/X86/mul-constant-i64.ll +++ b/llvm/test/CodeGen/X86/mul-constant-i64.ll @@ -1523,7 +1523,8 @@ define i64 @test_mul_by_29(i64 %x) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: leal (%eax,%eax,8), %ecx ; X86-NEXT: leal (%ecx,%ecx,2), %ecx -; X86-NEXT: leal (%ecx,%eax,2), %ecx +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl $29, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) ; X86-NEXT: addl %ecx, %edx @@ -1533,14 +1534,16 @@ define i64 @test_mul_by_29(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_29: ; X64-JAG: # BB#0: ; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] -; X64-JAG-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] ; X64-JAG-NEXT: retq # sched: [4:1.00] ; ; X86-NOOPT-LABEL: test_mul_by_29: diff --git a/llvm/test/CodeGen/X86/mul-constant-result.ll b/llvm/test/CodeGen/X86/mul-constant-result.ll index f5846ab37bab..6e74c1d4e9ee 100644 --- a/llvm/test/CodeGen/X86/mul-constant-result.ll +++ b/llvm/test/CodeGen/X86/mul-constant-result.ll @@ -164,7 +164,8 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 { ; X86-NEXT: .LBB0_35: ; X86-NEXT: leal (%eax,%eax,8), %ecx ; X86-NEXT: leal (%ecx,%ecx,2), %ecx -; X86-NEXT: leal (%ecx,%eax,2), %eax +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl ; X86-NEXT: .LBB0_36: @@ -322,15 +323,14 @@ define i32 @mult(i32, i32) local_unnamed_addr #0 { ; X64-HSW-NEXT: .LBB0_31: ; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx ; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx -; X64-HSW-NEXT: .LBB0_17: -; X64-HSW-NEXT: addl %eax, %ecx -; X64-HSW-NEXT: movl %ecx, %eax -; X64-HSW-NEXT: # kill: %eax %eax %rax -; X64-HSW-NEXT: retq +; X64-HSW-NEXT: jmp .LBB0_17 ; X64-HSW-NEXT: .LBB0_32: ; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx ; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx -; X64-HSW-NEXT: leal (%rcx,%rax,2), %eax +; X64-HSW-NEXT: addl %eax, %ecx +; X64-HSW-NEXT: .LBB0_17: +; X64-HSW-NEXT: addl %eax, %ecx +; X64-HSW-NEXT: movl %ecx, %eax ; X64-HSW-NEXT: # kill: %eax %eax %rax ; X64-HSW-NEXT: retq ; X64-HSW-NEXT: .LBB0_33: diff --git a/llvm/test/CodeGen/X86/umul-with-overflow.ll b/llvm/test/CodeGen/X86/umul-with-overflow.ll index 70b7daa6c162..2e877a0b6e0f 100644 --- a/llvm/test/CodeGen/X86/umul-with-overflow.ll +++ b/llvm/test/CodeGen/X86/umul-with-overflow.ll @@ -40,10 +40,10 @@ define i32 @test2(i32 %a, i32 %b) nounwind readnone { ; X64-NEXT: leal (%rdi,%rdi), %eax ; X64-NEXT: retq entry: - %tmp0 = add i32 %b, %a - %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2) - %tmp2 = extractvalue { i32, i1 } %tmp1, 0 - ret i32 %tmp2 + %tmp0 = add i32 %b, %a + %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2) + %tmp2 = extractvalue { i32, i1 } %tmp1, 0 + ret i32 %tmp2 } define i32 @test3(i32 %a, i32 %b) nounwind readnone { @@ -64,8 +64,8 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone { ; X64-NEXT: mull %ecx ; X64-NEXT: retq entry: - %tmp0 = add i32 %b, %a - %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4) - %tmp2 = extractvalue { i32, i1 } %tmp1, 0 - ret i32 %tmp2 + %tmp0 = add i32 %b, %a + %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4) + %tmp2 = extractvalue { i32, i1 } %tmp1, 0 + ret i32 %tmp2 } diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll index 3f6d234c9298..7c01432914ff 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll @@ -13,14 +13,14 @@ ; X64-NEXT: .p2align ; X64: %loop ; no complex address modes -; X64-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}}, +; X64-NOT: (%{{[^)]+}},%{{[^)]+}}, ; ; X32: @simple ; no expensive address computation in the preheader ; X32-NOT: imul ; X32: %loop ; no complex address modes -; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}}, +; X32-NOT: (%{{[^)]+}},%{{[^)]+}}, define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind { entry: br label %loop @@ -103,7 +103,7 @@ exit: ; X32-NOT: mov{{.*}}(%esp){{$}} ; X32: %for.body{{$}} ; no complex address modes -; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}}, +; X32-NOT: (%{{[^)]+}},%{{[^)]+}}, ; no reloads ; X32-NOT: (%esp) define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {