Add logic to greedy reg alloc to avoid bad eviction chains

This fixes bugzilla 26810
https://bugs.llvm.org/show_bug.cgi?id=26810

This is intended to prevent sequences like:
movl %ebp, 8(%esp) # 4-byte Spill
movl %ecx, %ebp
movl %ebx, %ecx
movl %edi, %ebx
movl %edx, %edi
cltd
idivl %esi
movl %edi, %edx
movl %ebx, %edi
movl %ecx, %ebx
movl %ebp, %ecx
movl 16(%esp), %ebp # 4 - byte Reload

Such sequences are created in 2 scenarios:

Scenario #1:
vreg0 is evicted from physreg0 by vreg1
Evictee vreg0 is intended for region splitting with split candidate physreg0 (the reg vreg0 was evicted from)
Region splitting creates a local interval because of interference with the evictor vreg1 (normally region spliiting creates 2 interval, the "by reg" and "by stack" intervals. Local interval created when interference occurs.)
one of the split intervals ends up evicting vreg2 from physreg1
Evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills

Scenario #2
vreg0 is evicted from physreg0 by vreg1
vreg2 is evicted from physreg2 by vreg3 etc
Evictee vreg0 is intended for region splitting with split candidate physreg1
Region splitting creates a local interval because of interference with the evictor vreg1
one of the split intervals ends up evicting back original evictor vreg1 from physreg0 (the reg vreg0 was evicted from)
Another evictee vreg2 is intended for region splitting with split candidate physreg1
one of the split intervals ends up evicting vreg3 from physreg2 etc.. until someone spills

As compile time was a concern, I've added a flag to control weather we do cost calculations for local intervals we expect to be created (it's on by default for X86 target, off for the rest).

Differential Revision: https://reviews.llvm.org/D35816

Change-Id: Id9411ff7bbb845463d289ba2ae97737a1ee7cc39
llvm-svn: 316295
This commit is contained in:
Marina Yatsina 2017-10-22 17:59:38 +00:00
parent dac20263a1
commit f9371d821f
10 changed files with 881 additions and 20 deletions

View File

@ -66,6 +66,32 @@ class VirtRegMap;
/// \brief (re)compute li's spill weight and allocation hint.
void calculateSpillWeightAndHint(LiveInterval &li);
/// \brief Compute future expected spill weight of a split artifact of li
/// that will span between start and end slot indexes.
/// \param li The live interval to be split.
/// \param start The expected begining of the split artifact. Instructions
/// before start will not affect the weight.
/// \param end The expected end of the split artifact. Instructions
/// after end will not affect the weight.
/// \return The expected spill weight of the split artifact. Returns
/// negative weight for unspillable li.
float futureWeight(LiveInterval &li, SlotIndex start, SlotIndex end);
/// \brief Helper function for weight calculations.
/// (Re)compute li's spill weight and allocation hint, or, for non null
/// start and end - compute future expected spill weight of a split
/// artifact of li that will span between start and end slot indexes.
/// \param li The live interval for which to compute the weight.
/// \param start The expected begining of the split artifact. Instructions
/// before start will not affect the weight. Relevant for
/// weight calculation of future split artifact.
/// \param end The expected end of the split artifact. Instructions
/// after end will not affect the weight. Relevant for
/// weight calculation of future split artifact.
/// \return The spill weight. Returns negative weight for unspillable li.
float weightCalcHelper(LiveInterval &li, SlotIndex *start = nullptr,
SlotIndex *end = nullptr);
};
/// \brief Compute spill weights and allocation hints for all virtual register

View File

@ -107,6 +107,11 @@ class VirtRegMap;
const MachineBlockFrequencyInfo *MBFI,
const MachineInstr &Instr);
/// Calculate the spill weight to assign to a single instruction.
static float getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
const MachineBasicBlock *MBB);
LiveInterval &getInterval(unsigned Reg) {
if (hasInterval(Reg))
return *VirtRegIntervals[Reg];

View File

@ -221,6 +221,11 @@ public:
/// a finer grain to tune the register allocator.
virtual bool enableRALocalReassignment(CodeGenOpt::Level OptLevel) const;
/// \brief True if the subtarget should consider the cost of local intervals
/// created by a split candidate when choosing the best split candidate. This
/// heuristic may be compile time intensive.
virtual bool enableAdvancedRASplitCost() const;
/// \brief Enable use of alias analysis during code generation (during MI
/// scheduling, DAGCombine, etc.).
virtual bool useAA() const;

View File

@ -133,8 +133,21 @@ static bool isRematerializable(const LiveInterval &LI,
return true;
}
void
VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
float weight = weightCalcHelper(li);
// Check if unspillable.
if (weight < 0)
return;
li.weight = weight;
}
float VirtRegAuxInfo::futureWeight(LiveInterval &li, SlotIndex start,
SlotIndex end) {
return weightCalcHelper(li, &start, &end);
}
float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
SlotIndex *end) {
MachineRegisterInfo &mri = MF.getRegInfo();
const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo();
MachineBasicBlock *mbb = nullptr;
@ -154,10 +167,38 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
// Don't recompute spill weight for an unspillable register.
bool Spillable = li.isSpillable();
bool localSplitArtifact = start && end;
// Do not update future local split artifacts.
bool updateLI = !localSplitArtifact;
if (localSplitArtifact) {
MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*end);
assert(localMBB == LIS.getMBBFromIndex(*start) &&
"start and end are expected to be in the same basic block");
// Local split artifact will have 2 additional copy instructions and they
// will be in the same BB.
// localLI = COPY other
// ...
// other = COPY localLI
totalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB);
totalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB);
numInstr += 2;
}
for (MachineRegisterInfo::reg_instr_iterator
I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end();
I != E; ) {
MachineInstr *mi = &*(I++);
// For local split artifacts, we are interested only in instructions between
// the expected start and end of the range.
SlotIndex si = LIS.getInstructionIndex(*mi);
if (localSplitArtifact && ((si < *start) || (si > *end)))
continue;
numInstr++;
if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
continue;
@ -212,23 +253,25 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
Hint.clear();
// Always prefer the physreg hint.
if (unsigned hint = hintPhys ? hintPhys : hintVirt) {
mri.setRegAllocationHint(li.reg, 0, hint);
// Weakly boost the spill weight of hinted registers.
totalWeight *= 1.01F;
if (updateLI) {
if (unsigned hint = hintPhys ? hintPhys : hintVirt) {
mri.setRegAllocationHint(li.reg, 0, hint);
// Weakly boost the spill weight of hinted registers.
totalWeight *= 1.01F;
}
}
// If the live interval was already unspillable, leave it that way.
if (!Spillable)
return;
return -1.0;
// Mark li as unspillable if all live ranges are tiny and the interval
// is not live at any reg mask. If the interval is live at a reg mask
// spilling may be required.
if (li.isZeroLength(LIS.getSlotIndexes()) &&
if (updateLI && li.isZeroLength(LIS.getSlotIndexes()) &&
!li.isLiveAtIndexes(LIS.getRegMaskSlots())) {
li.markNotSpillable();
return;
return -1.0;
}
// If all of the definitions of the interval are re-materializable,
@ -238,5 +281,7 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
totalWeight *= 0.5F;
li.weight = normalize(totalWeight, li.getSize(), numInstr);
if (localSplitArtifact)
return normalize(totalWeight, start->distance(*end), numInstr);
return normalize(totalWeight, li.getSize(), numInstr);
}

View File

@ -824,7 +824,13 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
const MachineInstr &MI) {
BlockFrequency Freq = MBFI->getBlockFreq(MI.getParent());
return getSpillWeight(isDef, isUse, MBFI, MI.getParent());
}
float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
const MachineBlockFrequencyInfo *MBFI,
const MachineBasicBlock *MBB) {
BlockFrequency Freq = MBFI->getBlockFreq(MBB);
const float Scale = 1.0f / MBFI->getEntryFreq();
return (isDef + isUse) * (Freq.getFrequency() * Scale);
}

View File

@ -23,6 +23,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@ -129,6 +130,12 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost",
cl::desc("Cost for first time use of callee-saved register."),
cl::init(0), cl::Hidden);
static cl::opt<bool> ConsiderLocalIntervalCost(
"condsider-local-interval-cost", cl::Hidden,
cl::desc("Consider the cost of local intervals created by a split "
"candidate when choosing the best split candidate."),
cl::init(false));
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
createGreedyRegisterAllocator);
@ -277,6 +284,57 @@ class RAGreedy : public MachineFunctionPass,
}
};
/// EvictionTrack - Keeps track of past evictions in order to optimize region
/// split decision.
class EvictionTrack {
public:
using EvictorInfo =
std::pair<unsigned /* evictor */, unsigned /* physreg */>;
using EvicteeInfo = llvm::MapVector<unsigned /* evictee */, EvictorInfo>;
private:
/// Each Vreg that has been evicted in the last stage of selectOrSplit will
/// be mapped to the evictor Vreg and the PhysReg it was evicted from.
EvicteeInfo Evictees;
public:
/// \brief Clear all eviction information.
void clear() { Evictees.clear(); }
/// \brief Clear eviction information for the given evictee Vreg.
/// E.g. when Vreg get's a new allocation, the old eviction info is no
/// longer relevant.
/// \param Evictee The evictee Vreg for whom we want to clear collected
/// eviction info.
void clearEvicteeInfo(unsigned Evictee) { Evictees.erase(Evictee); }
/// \brief Track new eviction.
/// The Evictor vreg has evicted the Evictee vreg from Physreg.
/// \praram PhysReg The phisical register Evictee was evicted from.
/// \praram Evictor The evictor Vreg that evicted Evictee.
/// \praram Evictee The evictee Vreg.
void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) {
Evictees[Evictee].first = Evictor;
Evictees[Evictee].second = PhysReg;
}
/// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg.
/// \praram Evictee The evictee vreg.
/// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if
/// nobody has evicted Evictee from PhysReg.
EvictorInfo getEvictor(unsigned Evictee) {
if (Evictees.count(Evictee)) {
return Evictees[Evictee];
}
return EvictorInfo(0, 0);
}
};
// Keeps track of past evictions in order to optimize region split decision.
EvictionTrack LastEvicted;
// splitting state.
std::unique_ptr<SplitAnalysis> SA;
std::unique_ptr<SplitEditor> SE;
@ -340,6 +398,10 @@ class RAGreedy : public MachineFunctionPass,
/// obtained from the TargetSubtargetInfo.
bool EnableLocalReassign;
/// Enable or not the the consideration of the cost of local intervals created
/// by a split candidate when choosing the best split candidate.
bool EnableAdvancedRASplitCost;
/// Set of broken hints that may be reconciled later because of eviction.
SmallSetVector<LiveInterval *, 8> SetOfBrokenHints;
@ -382,13 +444,24 @@ private:
bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
void growRegion(GlobalSplitCandidate &Cand);
BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&);
bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand,
unsigned BBNumber,
const AllocationOrder &Order);
BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &,
const AllocationOrder &Order,
bool *CanCauseEvictionChain);
bool calcCompactRegion(GlobalSplitCandidate&);
void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
void calcGapWeights(unsigned, SmallVectorImpl<float>&);
unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg);
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
bool canEvictInterferenceInRange(LiveInterval &VirtReg, unsigned PhysReg,
SlotIndex Start, SlotIndex End,
EvictionCost &MaxCost);
unsigned getCheapestEvicteeWeight(const AllocationOrder &Order,
LiveInterval &VirtReg, SlotIndex Start,
SlotIndex End, float *BestEvictWeight);
void evictInterference(LiveInterval&, unsigned,
SmallVectorImpl<unsigned>&);
bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
@ -405,7 +478,8 @@ private:
unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
AllocationOrder &Order,
BlockFrequency &BestCost,
unsigned &NumCands, bool IgnoreCSR);
unsigned &NumCands, bool IgnoreCSR,
bool *CanCauseEvictionChain = nullptr);
/// Perform region splitting.
unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
bool HasCompact,
@ -859,6 +933,92 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
return true;
}
/// \brief Return true if all interferences between VirtReg and PhysReg between
/// Start and End can be evicted.
///
/// \param VirtReg Live range that is about to be assigned.
/// \param PhysReg Desired register for assignment.
/// \param Start Start of range to look for interferences.
/// \param End End of range to look for interferences.
/// \param MaxCost Only look for cheaper candidates and update with new cost
/// when returning true.
/// \return True when interference can be evicted cheaper than MaxCost.
bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
unsigned PhysReg, SlotIndex Start,
SlotIndex End,
EvictionCost &MaxCost) {
EvictionCost Cost;
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// Check if any interfering live range is heavier than MaxWeight.
for (unsigned i = Q.interferingVRegs().size(); i; --i) {
LiveInterval *Intf = Q.interferingVRegs()[i - 1];
// Check if interference overlast the segment in interest.
if (!Intf->overlaps(Start, End))
continue;
// Cannot evict non virtual reg interference.
if (!TargetRegisterInfo::isVirtualRegister(Intf->reg))
return false;
// Never evict spill products. They cannot split or spill.
if (getStage(*Intf) == RS_Done)
return false;
// Would this break a satisfied hint?
bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
// Update eviction cost.
Cost.BrokenHints += BreaksHint;
Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
// Abort if this would be too expensive.
if (!(Cost < MaxCost))
return false;
}
}
if (Cost.MaxWeight == 0)
return false;
MaxCost = Cost;
return true;
}
/// \brief Return tthe physical register that will be best
/// candidate for eviction by a local split interval that will be created
/// between Start and End.
///
/// \param Order The allocation order
/// \param VirtReg Live range that is about to be assigned.
/// \param Start Start of range to look for interferences
/// \param End End of range to look for interferences
/// \param BestEvictweight The eviction cost of that eviction
/// \return The PhysReg which is the best candidate for eviction and the
/// eviction cost in BestEvictweight
unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
LiveInterval &VirtReg,
SlotIndex Start, SlotIndex End,
float *BestEvictweight) {
EvictionCost BestEvictCost;
BestEvictCost.setMax();
BestEvictCost.MaxWeight = VirtReg.weight;
unsigned BestEvicteePhys = 0;
// Go over all physical registers and find the best candidate for eviction
for (auto PhysReg : Order.getOrder()) {
if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End,
BestEvictCost))
continue;
// Best so far.
BestEvicteePhys = PhysReg;
}
*BestEvictweight = BestEvictCost.MaxWeight;
return BestEvicteePhys;
}
/// evictInterference - Evict any interferring registers that prevent VirtReg
/// from being assigned to Physreg. This assumes that canEvictInterference
/// returned true.
@ -893,6 +1053,9 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
// The same VirtReg may be present in multiple RegUnits. Skip duplicates.
if (!VRM->hasPhys(Intf->reg))
continue;
LastEvicted.addEviction(PhysReg, VirtReg.reg, Intf->reg);
Matrix->unassign(*Intf);
assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
VirtReg.isSpillable() < Intf->isSpillable()) &&
@ -1214,13 +1377,117 @@ BlockFrequency RAGreedy::calcSpillCost() {
return Cost;
}
/// \brief Check if splitting Evictee will create a local split interval in
/// basic block number BBNumber that may cause a bad eviction chain. This is
/// intended to prevent bad eviction sequences like:
/// movl %ebp, 8(%esp) # 4-byte Spill
/// movl %ecx, %ebp
/// movl %ebx, %ecx
/// movl %edi, %ebx
/// movl %edx, %edi
/// cltd
/// idivl %esi
/// movl %edi, %edx
/// movl %ebx, %edi
/// movl %ecx, %ebx
/// movl %ebp, %ecx
/// movl 16(%esp), %ebp # 4 - byte Reload
///
/// Such sequences are created in 2 scenarios:
///
/// Scenario #1:
/// vreg0 is evicted from physreg0 by vreg1.
/// Evictee vreg0 is intended for region splitting with split candidate
/// physreg0 (the reg vreg0 was evicted from).
/// Region splitting creates a local interval because of interference with the
/// evictor vreg1 (normally region spliitting creates 2 interval, the "by reg"
/// and "by stack" intervals and local interval created when interference
/// occurs).
/// One of the split intervals ends up evicting vreg2 from physreg1.
/// Evictee vreg2 is intended for region splitting with split candidate
/// physreg1.
/// One of the split intervals ends up evicting vreg3 from physreg2, etc.
///
/// Scenario #2
/// vreg0 is evicted from physreg0 by vreg1.
/// vreg2 is evicted from physreg2 by vreg3 etc.
/// Evictee vreg0 is intended for region splitting with split candidate
/// physreg1.
/// Region splitting creates a local interval because of interference with the
/// evictor vreg1.
/// One of the split intervals ends up evicting back original evictor vreg1
/// from physreg0 (the reg vreg0 was evicted from).
/// Another evictee vreg2 is intended for region splitting with split candidate
/// physreg1.
/// One of the split intervals ends up evicting vreg3 from physreg2, etc.
///
/// \param Evictee The register considered to be split.
/// \param Cand The split candidate that determines the physical register
/// we are splitting for and the interferences.
/// \param BBNumber The number of a BB for which the region split process will
/// create a local split interval.
/// \param Order The phisical registers that may get evicted by a split
/// artifact of Evictee.
/// \return True if splitting Evictee may cause a bad eviction chain, false
/// otherwise.
bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee,
GlobalSplitCandidate &Cand,
unsigned BBNumber,
const AllocationOrder &Order) {
EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee);
unsigned Evictor = VregEvictorInfo.first;
unsigned PhysReg = VregEvictorInfo.second;
// No actual evictor.
if (!Evictor || !PhysReg)
return false;
float MaxWeight = 0;
unsigned FutureEvictedPhysReg =
getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee),
Cand.Intf.first(), Cand.Intf.last(), &MaxWeight);
// The bad eviction chain occurs when either the split candidate the the
// evited reg or one of the split artifact will evict the evicting reg.
if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg))
return false;
Cand.Intf.moveToBlock(BBNumber);
// Check to see if the Evictor contains interference (with Evictee) in the
// given BB. If so, this interference caused the eviction of Evictee from
// PhysReg. This suggest that we will create a local interval during the
// region split to avoid this interference This local interval may cause a bad
// eviction chain.
if (!LIS->hasInterval(Evictor))
return false;
LiveInterval &EvictorLI = LIS->getInterval(Evictor);
if (EvictorLI.FindSegmentContaining(Cand.Intf.first()) == EvictorLI.end())
return false;
// Now, check to see if the local interval we will create is going to be
// expensive enough to evict somebody If so, this may cause a bad eviction
// chain.
VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis<MachineLoopInfo>(), *MBFI);
float splitArtifactWeight =
VRAI.futureWeight(LIS->getInterval(Evictee),
Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight)
return false;
return true;
}
/// calcGlobalSplitCost - Return the global split cost of following the split
/// pattern in LiveBundles. This cost should be added to the local cost of the
/// interference pattern in SplitConstraints.
///
BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
const AllocationOrder &Order,
bool *CanCauseEvictionChain) {
BlockFrequency GlobalCost = 0;
const BitVector &LiveBundles = Cand.LiveBundles;
unsigned VirtRegToSplit = SA->getParent().reg;
ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
for (unsigned i = 0; i != UseBlocks.size(); ++i) {
const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
@ -1229,6 +1496,24 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
unsigned Ins = 0;
Cand.Intf.moveToBlock(BC.Number);
// Check wheather a local interval is going to be created during the region
// split.
if (EnableAdvancedRASplitCost && CanCauseEvictionChain &&
Cand.Intf.hasInterference() && BI.LiveIn && BI.LiveOut && RegIn &&
RegOut) {
if (splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) {
// This interfernce cause our eviction from this assignment, we might
// evict somebody else, add that cost.
// See splitCanCauseEvictionChain for detailed description of scenarios.
GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
*CanCauseEvictionChain = true;
}
}
if (BI.LiveIn)
Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
if (BI.LiveOut)
@ -1249,6 +1534,20 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
if (Cand.Intf.hasInterference()) {
GlobalCost += SpillPlacer->getBlockFrequency(Number);
GlobalCost += SpillPlacer->getBlockFrequency(Number);
// Check wheather a local interval is going to be created during the
// region split.
if (EnableAdvancedRASplitCost && CanCauseEvictionChain &&
splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) {
// This interfernce cause our eviction from this assignment, we might
// evict somebody else, add that cost.
// See splitCanCauseEvictionChain for detailed description of
// scenarios.
GlobalCost += SpillPlacer->getBlockFrequency(Number);
GlobalCost += SpillPlacer->getBlockFrequency(Number);
*CanCauseEvictionChain = true;
}
}
continue;
}
@ -1413,6 +1712,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVectorImpl<unsigned> &NewVRegs) {
unsigned NumCands = 0;
BlockFrequency SpillCost = calcSpillCost();
BlockFrequency BestCost;
// Check if we can split this live range around a compact region.
@ -1424,14 +1724,24 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
} else {
// No benefit from the compact region, our fallback will be per-block
// splitting. Make sure we find a solution that is cheaper than spilling.
BestCost = calcSpillCost();
BestCost = SpillCost;
DEBUG(dbgs() << "Cost of isolating all blocks = ";
MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
}
bool CanCauseEvictionChain = false;
unsigned BestCand =
calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands,
false/*IgnoreCSR*/);
false /*IgnoreCSR*/, &CanCauseEvictionChain);
// Split candidates with compact regions can cause a bad eviction sequence.
// See splitCanCauseEvictionChain for detailed description of scenarios.
// To avoid it, we need to comapre the cost with the spill cost and not the
// current max frequency.
if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) &&
CanCauseEvictionChain) {
return 0;
}
// No solutions found, fall back to single block splitting.
if (!HasCompact && BestCand == NoCand)
@ -1443,8 +1753,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
AllocationOrder &Order,
BlockFrequency &BestCost,
unsigned &NumCands,
bool IgnoreCSR) {
unsigned &NumCands, bool IgnoreCSR,
bool *CanCauseEvictionChain) {
unsigned BestCand = NoCand;
Order.rewind();
while (unsigned PhysReg = Order.next()) {
@ -1504,7 +1814,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
continue;
}
Cost += calcGlobalSplitCost(Cand);
bool HasEvictionChain = false;
Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain);
DEBUG({
dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost)
<< " with bundles";
@ -1515,9 +1826,24 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
if (Cost < BestCost) {
BestCand = NumCands;
BestCost = Cost;
// See splitCanCauseEvictionChain for detailed description of bad
// eviction chain scenarios.
if (CanCauseEvictionChain)
*CanCauseEvictionChain = HasEvictionChain;
}
++NumCands;
}
if (CanCauseEvictionChain && BestCand != NoCand) {
// See splitCanCauseEvictionChain for detailed description of bad
// eviction chain scenarios.
DEBUG(dbgs() << "Best split candidate of vreg "
<< PrintReg(VirtReg.reg, TRI) << " may ");
if (!(*CanCauseEvictionChain))
DEBUG(dbgs() << "not ");
DEBUG(dbgs() << "cause bad eviction chain\n");
}
return BestCand;
}
@ -2580,6 +2906,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// First try assigning a free register.
AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
// If VirtReg got an assignment, the eviction info is no longre relevant.
LastEvicted.clearEvicteeInfo(VirtReg.reg);
// When NewVRegs is not empty, we may have made decisions such as evicting
// a virtual register, go with the earlier decisions and use the physical
// register.
@ -2613,6 +2941,9 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// copy-related live-ranges.
if (Hint && Hint != PhysReg)
SetOfBrokenHints.insert(&VirtReg);
// If VirtReg eviction someone, the eviction info for it as an evictee is
// no longre relevant.
LastEvicted.clearEvicteeInfo(VirtReg.reg);
return PhysReg;
}
@ -2632,8 +2963,11 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// Try splitting VirtReg or interferences.
unsigned NewVRegSizeBefore = NewVRegs.size();
unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) {
// If VirtReg got split, the eviction info is no longre relevant.
LastEvicted.clearEvicteeInfo(VirtReg.reg);
return PhysReg;
}
}
// If we couldn't allocate a register from spilling, there is probably some
@ -2747,6 +3081,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
MF->getSubtarget().enableRALocalReassignment(
MF->getTarget().getOptLevel());
EnableAdvancedRASplitCost = ConsiderLocalIntervalCost ||
MF->getSubtarget().enableAdvancedRASplitCost();
if (VerifyEnabled)
MF->verify(this, "Before greedy register allocator");
@ -2778,6 +3115,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
SetOfBrokenHints.clear();
LastEvicted.clear();
allocatePhysRegs();
tryHintsRecoloring();

View File

@ -51,6 +51,10 @@ bool TargetSubtargetInfo::enableRALocalReassignment(
return true;
}
bool TargetSubtargetInfo::enableAdvancedRASplitCost() const {
return false;
}
bool TargetSubtargetInfo::enablePostRAScheduler() const {
return getSchedModel().PostRAScheduler;
}

View File

@ -672,6 +672,10 @@ public:
AntiDepBreakMode getAntiDepBreakMode() const override {
return TargetSubtargetInfo::ANTIDEP_CRITICAL;
}
virtual bool enableAdvancedRASplitCost() const {
return true;
}
};
} // end namespace llvm

View File

@ -0,0 +1,312 @@
; RUN: llc < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
; Make sure bad eviction sequence doesnt occur
; Fix for bugzilla 26810.
; This test is meant to make sure bad eviction sequence like the one described
; below does not occur
;
; movapd %xmm7, 160(%esp) # 16-byte Spill
; movapd %xmm5, %xmm7
; movapd %xmm4, %xmm5
; movapd %xmm3, %xmm4
; movapd %xmm2, %xmm3
; some_inst
; movapd %xmm3, %xmm2
; movapd %xmm4, %xmm3
; movapd %xmm5, %xmm4
; movapd %xmm7, %xmm5
; movapd 160(%esp), %xmm7 # 16-byte Reload
; Make sure we have no redundant copies in the problematic code section
; CHECK-LABEL: name: loop
; CHECK: bb.2.for.body:
; CHECK: SUBPDrr
; CHECK-NEXT: MOVAPSmr
; CHECK-NEXT: MOVAPSrm
; CHECK-NEXT: MULPDrm
; CHECK-NEXT: ADDPDrr
; CHECK-NEXT: ADD32ri8
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i386-pc-linux-gnu"
%struct._iobuf = type { i8* }
$"\01??_C@_01NOFIACDB@w?$AA@" = comdat any
$"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@" = comdat any
@"\01?v@@3PAU__m128d@@A" = global [8 x <2 x double>] zeroinitializer, align 16
@"\01?m1@@3PAU__m128d@@A" = local_unnamed_addr global [76800000 x <2 x double>] zeroinitializer, align 16
@"\01?m2@@3PAU__m128d@@A" = local_unnamed_addr global [8 x <2 x double>] zeroinitializer, align 16
@"\01??_C@_01NOFIACDB@w?$AA@" = linkonce_odr unnamed_addr constant [2 x i8] c"w\00", comdat, align 1
@"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@" = linkonce_odr unnamed_addr constant [10 x i8] c"/dev/null\00", comdat, align 1
; Function Attrs: norecurse
define i32 @main() local_unnamed_addr #0 {
entry:
tail call void @init()
%0 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8
%1 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
%2 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8
%3 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8
%4 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8
%5 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8
%6 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8
%7 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8
%.promoted.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8
%.promoted51.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
%.promoted53.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8
%.promoted55.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8
%.promoted57.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8
%.promoted59.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8
%.promoted61.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8
%.promoted63.i = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8
br label %for.body.i
for.body.i: ; preds = %for.body.i, %entry
%add.i64.i = phi <2 x double> [ %.promoted63.i, %entry ], [ %add.i.i, %for.body.i ]
%add.i3662.i = phi <2 x double> [ %.promoted61.i, %entry ], [ %add.i36.i, %for.body.i ]
%add.i3860.i = phi <2 x double> [ %.promoted59.i, %entry ], [ %add.i38.i, %for.body.i ]
%add.i4058.i = phi <2 x double> [ %.promoted57.i, %entry ], [ %add.i40.i, %for.body.i ]
%add.i4256.i = phi <2 x double> [ %.promoted55.i, %entry ], [ %add.i42.i, %for.body.i ]
%add.i4454.i = phi <2 x double> [ %.promoted53.i, %entry ], [ %add.i44.i, %for.body.i ]
%add.i4652.i = phi <2 x double> [ %.promoted51.i, %entry ], [ %add.i46.i, %for.body.i ]
%add.i4850.i = phi <2 x double> [ %.promoted.i, %entry ], [ %add.i48.i, %for.body.i ]
%i.049.i = phi i32 [ 0, %entry ], [ %inc.i, %for.body.i ]
%arrayidx.i = getelementptr inbounds [76800000 x <2 x double>], [76800000 x <2 x double>]* @"\01?m1@@3PAU__m128d@@A", i32 0, i32 %i.049.i
%8 = load <2 x double>, <2 x double>* %arrayidx.i, align 16, !tbaa !8
%mul.i.i = fmul <2 x double> %0, %8
%add.i48.i = fadd <2 x double> %add.i4850.i, %mul.i.i
%mul.i47.i = fmul <2 x double> %1, %8
%add.i46.i = fadd <2 x double> %add.i4652.i, %mul.i47.i
%mul.i45.i = fmul <2 x double> %2, %8
%add.i44.i = fadd <2 x double> %add.i4454.i, %mul.i45.i
%mul.i43.i = fmul <2 x double> %3, %8
%add.i42.i = fadd <2 x double> %add.i4256.i, %mul.i43.i
%mul.i41.i = fmul <2 x double> %4, %8
%add.i40.i = fadd <2 x double> %add.i4058.i, %mul.i41.i
%mul.i39.i = fmul <2 x double> %5, %8
%add.i38.i = fadd <2 x double> %add.i3860.i, %mul.i39.i
%mul.i37.i = fmul <2 x double> %6, %8
%add.i36.i = fsub <2 x double> %add.i3662.i, %mul.i37.i
%mul.i35.i = fmul <2 x double> %7, %8
%add.i.i = fadd <2 x double> %add.i64.i, %mul.i35.i
%inc.i = add nuw nsw i32 %i.049.i, 1
%exitcond.i = icmp eq i32 %inc.i, 76800000
br i1 %exitcond.i, label %loop.exit, label %for.body.i
loop.exit: ; preds = %for.body.i
store <2 x double> %add.i48.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8
store <2 x double> %add.i46.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
store <2 x double> %add.i46.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
store <2 x double> %add.i44.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8
store <2 x double> %add.i42.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8
store <2 x double> %add.i40.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8
store <2 x double> %add.i38.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8
store <2 x double> %add.i36.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8
store <2 x double> %add.i.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8
%call.i = tail call %struct._iobuf* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @"\01??_C@_01NOFIACDB@w?$AA@", i32 0, i32 0)) #7
%call1.i = tail call i32 @fwrite(i8* bitcast ([8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A" to i8*), i32 16, i32 8, %struct._iobuf* %call.i) #7
%call2.i = tail call i32 @fclose(%struct._iobuf* %call.i) #7
ret i32 0
}
define void @init() local_unnamed_addr #1 {
entry:
call void @llvm.memset.p0i8.i32(i8* bitcast ([8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A" to i8*), i8 0, i32 128, i32 16, i1 false)
%call.i = tail call i64 @_time64(i64* null)
%conv = trunc i64 %call.i to i32
tail call void @srand(i32 %conv)
br label %for.body6
for.body6: ; preds = %for.body6, %entry
%i2.051 = phi i32 [ 0, %entry ], [ %inc14, %for.body6 ]
%call7 = tail call i32 @rand()
%conv8 = sitofp i32 %call7 to double
%tmp.sroa.0.0.vec.insert = insertelement <2 x double> undef, double %conv8, i32 0
%call9 = tail call i32 @rand()
%conv10 = sitofp i32 %call9 to double
%tmp.sroa.0.8.vec.insert = insertelement <2 x double> %tmp.sroa.0.0.vec.insert, double %conv10, i32 1
%arrayidx12 = getelementptr inbounds [76800000 x <2 x double>], [76800000 x <2 x double>]* @"\01?m1@@3PAU__m128d@@A", i32 0, i32 %i2.051
store <2 x double> %tmp.sroa.0.8.vec.insert, <2 x double>* %arrayidx12, align 16, !tbaa !8
%inc14 = add nuw nsw i32 %i2.051, 1
%exitcond = icmp eq i32 %inc14, 76800000
br i1 %exitcond, label %for.body21.preheader, label %for.body6
for.body21.preheader: ; preds = %for.body6
%call25 = tail call i32 @rand()
%conv26 = sitofp i32 %call25 to double
%tmp23.sroa.0.0.vec.insert = insertelement <2 x double> undef, double %conv26, i32 0
%call28 = tail call i32 @rand()
%conv29 = sitofp i32 %call28 to double
%tmp23.sroa.0.8.vec.insert = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert, double %conv29, i32 1
store <2 x double> %tmp23.sroa.0.8.vec.insert, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8
%call25.1 = tail call i32 @rand()
%conv26.1 = sitofp i32 %call25.1 to double
%tmp23.sroa.0.0.vec.insert.1 = insertelement <2 x double> undef, double %conv26.1, i32 0
%call28.1 = tail call i32 @rand()
%conv29.1 = sitofp i32 %call28.1 to double
%tmp23.sroa.0.8.vec.insert.1 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.1, double %conv29.1, i32 1
store <2 x double> %tmp23.sroa.0.8.vec.insert.1, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
%call25.2 = tail call i32 @rand()
%conv26.2 = sitofp i32 %call25.2 to double
%tmp23.sroa.0.0.vec.insert.2 = insertelement <2 x double> undef, double %conv26.2, i32 0
%call28.2 = tail call i32 @rand()
%conv29.2 = sitofp i32 %call28.2 to double
%tmp23.sroa.0.8.vec.insert.2 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.2, double %conv29.2, i32 1
store <2 x double> %tmp23.sroa.0.8.vec.insert.2, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8
%call25.3 = tail call i32 @rand()
%conv26.3 = sitofp i32 %call25.3 to double
%tmp23.sroa.0.0.vec.insert.3 = insertelement <2 x double> undef, double %conv26.3, i32 0
%call28.3 = tail call i32 @rand()
%conv29.3 = sitofp i32 %call28.3 to double
%tmp23.sroa.0.8.vec.insert.3 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.3, double %conv29.3, i32 1
store <2 x double> %tmp23.sroa.0.8.vec.insert.3, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8
%call25.4 = tail call i32 @rand()
%conv26.4 = sitofp i32 %call25.4 to double
%tmp23.sroa.0.0.vec.insert.4 = insertelement <2 x double> undef, double %conv26.4, i32 0
%call28.4 = tail call i32 @rand()
%conv29.4 = sitofp i32 %call28.4 to double
%tmp23.sroa.0.8.vec.insert.4 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.4, double %conv29.4, i32 1
store <2 x double> %tmp23.sroa.0.8.vec.insert.4, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8
%call25.5 = tail call i32 @rand()
%conv26.5 = sitofp i32 %call25.5 to double
%tmp23.sroa.0.0.vec.insert.5 = insertelement <2 x double> undef, double %conv26.5, i32 0
%call28.5 = tail call i32 @rand()
%conv29.5 = sitofp i32 %call28.5 to double
%tmp23.sroa.0.8.vec.insert.5 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.5, double %conv29.5, i32 1
store <2 x double> %tmp23.sroa.0.8.vec.insert.5, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8
%call25.6 = tail call i32 @rand()
%conv26.6 = sitofp i32 %call25.6 to double
%tmp23.sroa.0.0.vec.insert.6 = insertelement <2 x double> undef, double %conv26.6, i32 0
%call28.6 = tail call i32 @rand()
%conv29.6 = sitofp i32 %call28.6 to double
%tmp23.sroa.0.8.vec.insert.6 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.6, double %conv29.6, i32 1
store <2 x double> %tmp23.sroa.0.8.vec.insert.6, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8
%call25.7 = tail call i32 @rand()
%conv26.7 = sitofp i32 %call25.7 to double
%tmp23.sroa.0.0.vec.insert.7 = insertelement <2 x double> undef, double %conv26.7, i32 0
%call28.7 = tail call i32 @rand()
%conv29.7 = sitofp i32 %call28.7 to double
%tmp23.sroa.0.8.vec.insert.7 = insertelement <2 x double> %tmp23.sroa.0.0.vec.insert.7, double %conv29.7, i32 1
store <2 x double> %tmp23.sroa.0.8.vec.insert.7, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8
ret void
}
; Function Attrs: norecurse nounwind
define void @loop() local_unnamed_addr #2 {
entry:
%0 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8
%1 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
%2 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8
%3 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8
%4 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8
%5 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8
%6 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8
%7 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?m2@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8
%.promoted = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8
%.promoted51 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
%.promoted53 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8
%.promoted55 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8
%.promoted57 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8
%.promoted59 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8
%.promoted61 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8
%.promoted63 = load <2 x double>, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8
br label %for.body
for.cond.cleanup: ; preds = %for.body
store <2 x double> %add.i48, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 0), align 16, !tbaa !8
store <2 x double> %add.i46, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 1), align 16, !tbaa !8
store <2 x double> %add.i44, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 2), align 16, !tbaa !8
store <2 x double> %add.i42, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 3), align 16, !tbaa !8
store <2 x double> %add.i40, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 4), align 16, !tbaa !8
store <2 x double> %add.i38, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 5), align 16, !tbaa !8
store <2 x double> %add.i36, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 6), align 16, !tbaa !8
store <2 x double> %add.i, <2 x double>* getelementptr inbounds ([8 x <2 x double>], [8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A", i32 0, i32 7), align 16, !tbaa !8
ret void
for.body: ; preds = %for.body, %entry
%add.i64 = phi <2 x double> [ %.promoted63, %entry ], [ %add.i, %for.body ]
%add.i3662 = phi <2 x double> [ %.promoted61, %entry ], [ %add.i36, %for.body ]
%add.i3860 = phi <2 x double> [ %.promoted59, %entry ], [ %add.i38, %for.body ]
%add.i4058 = phi <2 x double> [ %.promoted57, %entry ], [ %add.i40, %for.body ]
%add.i4256 = phi <2 x double> [ %.promoted55, %entry ], [ %add.i42, %for.body ]
%add.i4454 = phi <2 x double> [ %.promoted53, %entry ], [ %add.i44, %for.body ]
%add.i4652 = phi <2 x double> [ %.promoted51, %entry ], [ %add.i46, %for.body ]
%add.i4850 = phi <2 x double> [ %.promoted, %entry ], [ %add.i48, %for.body ]
%i.049 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%arrayidx = getelementptr inbounds [76800000 x <2 x double>], [76800000 x <2 x double>]* @"\01?m1@@3PAU__m128d@@A", i32 0, i32 %i.049
%8 = load <2 x double>, <2 x double>* %arrayidx, align 16, !tbaa !8
%mul.i = fmul <2 x double> %8, %0
%add.i48 = fadd <2 x double> %add.i4850, %mul.i
%mul.i47 = fmul <2 x double> %8, %1
%add.i46 = fadd <2 x double> %add.i4652, %mul.i47
%mul.i45 = fmul <2 x double> %8, %2
%add.i44 = fadd <2 x double> %add.i4454, %mul.i45
%mul.i43 = fmul <2 x double> %8, %3
%add.i42 = fadd <2 x double> %add.i4256, %mul.i43
%mul.i41 = fmul <2 x double> %8, %4
%add.i40 = fadd <2 x double> %add.i4058, %mul.i41
%mul.i39 = fmul <2 x double> %8, %5
%add.i38 = fadd <2 x double> %add.i3860, %mul.i39
%mul.i37 = fmul <2 x double> %8, %6
%add.i36 = fsub <2 x double> %add.i3662, %mul.i37
%mul.i35 = fmul <2 x double> %8, %7
%add.i = fadd <2 x double> %add.i64, %mul.i35
%inc = add nuw nsw i32 %i.049, 1
%exitcond = icmp eq i32 %inc, 76800000
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
; Function Attrs: nounwind
define void @"\01?dump@@YAXXZ"() local_unnamed_addr #3 {
entry:
%call = tail call %struct._iobuf* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @"\01??_C@_09LAIDGMDM@?1dev?1null?$AA@", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @"\01??_C@_01NOFIACDB@w?$AA@", i32 0, i32 0))
%call1 = tail call i32 @fwrite(i8* bitcast ([8 x <2 x double>]* @"\01?v@@3PAU__m128d@@A" to i8*), i32 16, i32 8, %struct._iobuf* %call)
%call2 = tail call i32 @fclose(%struct._iobuf* %call)
ret void
}
declare void @srand(i32) local_unnamed_addr #4
declare i32 @rand() local_unnamed_addr #4
; Function Attrs: nounwind
declare noalias %struct._iobuf* @fopen(i8* nocapture readonly, i8* nocapture readonly) local_unnamed_addr #5
; Function Attrs: nounwind
declare i32 @fwrite(i8* nocapture, i32, i32, %struct._iobuf* nocapture) local_unnamed_addr #5
; Function Attrs: nounwind
declare i32 @fclose(%struct._iobuf* nocapture) local_unnamed_addr #5
declare i64 @_time64(i64*) local_unnamed_addr #4
; Function Attrs: argmemonly nounwind
declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) #6
attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #5 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #6 = { argmemonly nounwind }
attributes #7 = { nounwind }
!llvm.linker.options = !{!0, !1, !2, !3, !4}
!llvm.module.flags = !{!5, !6}
!llvm.ident = !{!7}
!0 = !{!"/FAILIFMISMATCH:\22_MSC_VER=1900\22"}
!1 = !{!"/FAILIFMISMATCH:\22_ITERATOR_DEBUG_LEVEL=0\22"}
!2 = !{!"/FAILIFMISMATCH:\22RuntimeLibrary=MT_StaticRelease\22"}
!3 = !{!"/DEFAULTLIB:libcpmt.lib"}
!4 = !{!"/FAILIFMISMATCH:\22_CRT_STDIO_ISO_WIDE_SPECIFIERS=0\22"}
!5 = !{i32 1, !"NumRegisterParameters", i32 0}
!6 = !{i32 1, !"wchar_size", i32 2}
!7 = !{!"clang version 5.0.0 (cfe/trunk 305640)"}
!8 = !{!9, !9, i64 0}
!9 = !{!"omnipotent char", !10, i64 0}
!10 = !{!"Simple C++ TBAA"}

View File

@ -0,0 +1,116 @@
; RUN: llc < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
; Make sure bad eviction sequence doesnt occur
; Part of the fix for bugzilla 26810.
; This test is meant to make sure bad eviction sequence like the one described
; below does not occur
;
; movl %ebp, 8(%esp) # 4-byte Spill
; movl %ecx, %ebp
; movl %ebx, %ecx
; movl %edi, %ebx
; movl %edx, %edi
; cltd
; idivl %esi
; movl %edi, %edx
; movl %ebx, %edi
; movl %ecx, %ebx
; movl %ebp, %ecx
; movl 16(%esp), %ebp # 4 - byte Reload
; Make sure we have no redundant copies in the problematic code seqtion
; CHECK-LABEL: name: bar
; CHECK: bb.3.for.body:
; CHECK: %eax = COPY
; CHECK-NEXT: CDQ
; CHECK-NEXT: IDIV32r
; CHECK-NEXT: ADD32rr
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i386-pc-linux-gnu"
; Function Attrs: norecurse nounwind readonly
define i32 @bar(i32 %size, i32* nocapture readonly %arr, i32* nocapture readnone %tmp) local_unnamed_addr #1 {
entry:
%0 = load i32, i32* %arr, align 4, !tbaa !3
%arrayidx3 = getelementptr inbounds i32, i32* %arr, i32 1
%1 = load i32, i32* %arrayidx3, align 4, !tbaa !3
%arrayidx5 = getelementptr inbounds i32, i32* %arr, i32 2
%2 = load i32, i32* %arrayidx5, align 4, !tbaa !3
%arrayidx7 = getelementptr inbounds i32, i32* %arr, i32 3
%3 = load i32, i32* %arrayidx7, align 4, !tbaa !3
%arrayidx9 = getelementptr inbounds i32, i32* %arr, i32 4
%4 = load i32, i32* %arrayidx9, align 4, !tbaa !3
%arrayidx11 = getelementptr inbounds i32, i32* %arr, i32 5
%5 = load i32, i32* %arrayidx11, align 4, !tbaa !3
%arrayidx13 = getelementptr inbounds i32, i32* %arr, i32 6
%6 = load i32, i32* %arrayidx13, align 4, !tbaa !3
%arrayidx15 = getelementptr inbounds i32, i32* %arr, i32 7
%7 = load i32, i32* %arrayidx15, align 4, !tbaa !3
%arrayidx17 = getelementptr inbounds i32, i32* %arr, i32 8
%8 = load i32, i32* %arrayidx17, align 4, !tbaa !3
%cmp69 = icmp sgt i32 %size, 1
br i1 %cmp69, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.body, %entry
%x0.0.lcssa = phi i32 [ %0, %entry ], [ %add, %for.body ]
%x1.0.lcssa = phi i32 [ %1, %entry ], [ %sub, %for.body ]
%x2.0.lcssa = phi i32 [ %2, %entry ], [ %mul, %for.body ]
%x3.0.lcssa = phi i32 [ %3, %entry ], [ %div, %for.body ]
%x4.0.lcssa = phi i32 [ %4, %entry ], [ %add19, %for.body ]
%x5.0.lcssa = phi i32 [ %5, %entry ], [ %sub20, %for.body ]
%x6.0.lcssa = phi i32 [ %6, %entry ], [ %add21, %for.body ]
%x7.0.lcssa = phi i32 [ %7, %entry ], [ %mul22, %for.body ]
%x8.0.lcssa = phi i32 [ %8, %entry ], [ %sub23, %for.body ]
%mul24 = mul nsw i32 %x1.0.lcssa, %x0.0.lcssa
%mul25 = mul nsw i32 %mul24, %x2.0.lcssa
%mul26 = mul nsw i32 %mul25, %x3.0.lcssa
%mul27 = mul nsw i32 %mul26, %x4.0.lcssa
%mul28 = mul nsw i32 %mul27, %x5.0.lcssa
%mul29 = mul nsw i32 %mul28, %x6.0.lcssa
%mul30 = mul nsw i32 %mul29, %x7.0.lcssa
%mul31 = mul nsw i32 %mul30, %x8.0.lcssa
ret i32 %mul31
for.body: ; preds = %entry, %for.body
%i.079 = phi i32 [ %inc, %for.body ], [ 1, %entry ]
%x8.078 = phi i32 [ %sub23, %for.body ], [ %8, %entry ]
%x7.077 = phi i32 [ %mul22, %for.body ], [ %7, %entry ]
%x6.076 = phi i32 [ %add21, %for.body ], [ %6, %entry ]
%x5.075 = phi i32 [ %sub20, %for.body ], [ %5, %entry ]
%x4.074 = phi i32 [ %add19, %for.body ], [ %4, %entry ]
%x3.073 = phi i32 [ %div, %for.body ], [ %3, %entry ]
%x2.072 = phi i32 [ %mul, %for.body ], [ %2, %entry ]
%x1.071 = phi i32 [ %sub, %for.body ], [ %1, %entry ]
%x0.070 = phi i32 [ %add, %for.body ], [ %0, %entry ]
%add = add nsw i32 %x1.071, %x0.070
%sub = sub nsw i32 %x1.071, %x2.072
%mul = mul nsw i32 %x3.073, %x2.072
%div = sdiv i32 %x3.073, %x4.074
%add19 = add nsw i32 %x5.075, %x4.074
%sub20 = sub nsw i32 %x5.075, %x6.076
%add21 = add nsw i32 %x7.077, %x6.076
%mul22 = mul nsw i32 %x8.078, %x7.077
%sub23 = sub nsw i32 %x8.078, %add
%inc = add nuw nsw i32 %i.079, 1
%exitcond = icmp eq i32 %inc, %size
br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !7
}
attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { norecurse nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
!0 = !{i32 1, !"NumRegisterParameters", i32 0}
!1 = !{i32 1, !"wchar_size", i32 2}
!2 = !{!"clang version 5.0.0 (cfe/trunk 305640)"}
!3 = !{!4, !4, i64 0}
!4 = !{!"int", !5, i64 0}
!5 = !{!"omnipotent char", !6, i64 0}
!6 = !{!"Simple C/C++ TBAA"}
!7 = distinct !{!7, !8}
!8 = !{!"llvm.loop.unroll.disable"}