[regalloc] Ensure Query::collectInterferringVregs is called before interval iteration

The main part of the patch is the change in RegAllocGreedy.cpp: Q.collectInterferringVregs()
needs to be called before iterating the interfering live ranges.

The rest of the patch offers support that is the case: instead of  clearing the query's
InterferingVRegs field, we invalidate it. The clearing happens when the live reg matrix
is invalidated (existing triggering mechanism).

Without the change in RegAllocGreedy.cpp, the compiler ices.

This patch should make it more easily discoverable by developers that
collectInterferringVregs needs to be called before iterating.

I will follow up with a subsequent patch to improve the usability and maintainability of Query.

Differential Revision: https://reviews.llvm.org/D98232
This commit is contained in:
Mircea Trofin 2021-03-08 20:55:53 -08:00
parent 908a267b5a
commit ce61def529
12 changed files with 119 additions and 103 deletions

View File

@ -114,30 +114,30 @@ public:
const LiveRange *LR = nullptr;
LiveRange::const_iterator LRI; ///< current position in LR
ConstSegmentIter LiveUnionI; ///< current position in LiveUnion
SmallVector<LiveInterval*,4> InterferingVRegs;
Optional<SmallVector<LiveInterval *, 4>> InterferingVRegs;
bool CheckedFirstInterference = false;
bool SeenAllInterferences = false;
unsigned Tag = 0;
unsigned UserTag = 0;
public:
Query() = default;
Query(const LiveRange &LR, const LiveIntervalUnion &LIU)
: LiveUnion(&LIU), LR(&LR) {}
Query(const Query &) = delete;
Query &operator=(const Query &) = delete;
void reset(unsigned NewUserTag, const LiveRange &NewLR,
const LiveIntervalUnion &NewLiveUnion) {
LiveUnion = &NewLiveUnion;
LR = &NewLR;
InterferingVRegs.clear();
InterferingVRegs = None;
CheckedFirstInterference = false;
SeenAllInterferences = false;
Tag = NewLiveUnion.getTag();
UserTag = NewUserTag;
}
public:
Query() = default;
Query(const LiveRange &LR, const LiveIntervalUnion &LIU):
LiveUnion(&LIU), LR(&LR) {}
Query(const Query &) = delete;
Query &operator=(const Query &) = delete;
void init(unsigned NewUserTag, const LiveRange &NewLR,
const LiveIntervalUnion &NewLiveUnion) {
if (UserTag == NewUserTag && LR == &NewLR && LiveUnion == &NewLiveUnion &&
@ -164,7 +164,7 @@ public:
// Vector generated by collectInterferingVRegs.
const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
return InterferingVRegs;
return *InterferingVRegs;
}
};

View File

@ -112,7 +112,7 @@ LiveInterval *LiveIntervalUnion::getOneVReg() const {
// Scan the vector of interfering virtual registers in this union. Assume it's
// quite small.
bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
return is_contained(InterferingVRegs, VirtReg);
return is_contained(*InterferingVRegs, VirtReg);
}
// Collect virtual registers in this union that interfere with this
@ -126,9 +126,12 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
//
unsigned LiveIntervalUnion::Query::
collectInterferingVRegs(unsigned MaxInterferingRegs) {
if (!InterferingVRegs)
InterferingVRegs.emplace();
// Fast path return if we already have the desired information.
if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs)
return InterferingVRegs.size();
if (SeenAllInterferences || InterferingVRegs->size() >= MaxInterferingRegs)
return InterferingVRegs->size();
// Set up iterators on the first call.
if (!CheckedFirstInterference) {
@ -157,14 +160,14 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
LiveInterval *VReg = LiveUnionI.value();
if (VReg != RecentReg && !isSeenInterference(VReg)) {
RecentReg = VReg;
InterferingVRegs.push_back(VReg);
if (InterferingVRegs.size() >= MaxInterferingRegs)
return InterferingVRegs.size();
InterferingVRegs->push_back(VReg);
if (InterferingVRegs->size() >= MaxInterferingRegs)
return InterferingVRegs->size();
}
// This LiveUnion segment is no longer interesting.
if (!(++LiveUnionI).valid()) {
SeenAllInterferences = true;
return InterferingVRegs.size();
return InterferingVRegs->size();
}
}
@ -185,7 +188,7 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
LiveUnionI.advanceTo(LRI->start);
}
SeenAllInterferences = true;
return InterferingVRegs.size();
return InterferingVRegs->size();
}
void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,

View File

@ -216,7 +216,21 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
// Check for interference with that segment
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
if (query(LR, *Units).checkInterference())
// LR is stack-allocated. LiveRegMatrix caches queries by a key that
// includes the address of the live range. If (for the same reg unit) this
// checkInterference overload is called twice, without any other query()
// calls in between (on heap-allocated LiveRanges) - which would invalidate
// the cached query - the LR address seen the second time may well be the
// same as that seen the first time, while the Start/End/valno may not - yet
// the same cached result would be fetched. To avoid that, we don't cache
// this query.
//
// FIXME: the usability of the Query API needs to be improved to avoid
// subtle bugs due to query identity. Avoiding caching, for example, would
// greatly simplify things.
LiveIntervalUnion::Query Q;
Q.reset(UserTag, LR, Matrix[*Units]);
if (Q.checkInterference())
return true;
}
return false;

View File

@ -471,12 +471,13 @@ private:
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const;
bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &,
const SmallVirtRegSet &) const;
bool canEvictInterferenceInRange(LiveInterval &VirtReg, MCRegister PhysReg,
SlotIndex Start, SlotIndex End,
EvictionCost &MaxCost) const;
bool canEvictInterferenceInRange(const LiveInterval &VirtReg,
MCRegister PhysReg, SlotIndex Start,
SlotIndex End, EvictionCost &MaxCost) const;
MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order,
LiveInterval &VirtReg, SlotIndex Start,
SlotIndex End, float *BestEvictWeight);
const LiveInterval &VirtReg,
SlotIndex Start, SlotIndex End,
float *BestEvictWeight) const;
void evictInterference(LiveInterval &, MCRegister,
SmallVectorImpl<Register> &);
bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg,
@ -979,7 +980,7 @@ bool RAGreedy::canEvictInterference(
/// \param MaxCost Only look for cheaper candidates and update with new cost
/// when returning true.
/// \return True when interference can be evicted cheaper than MaxCost.
bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg,
MCRegister PhysReg, SlotIndex Start,
SlotIndex End,
EvictionCost &MaxCost) const {
@ -987,6 +988,7 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
Q.collectInterferingVRegs();
// Check if any interfering live range is heavier than MaxWeight.
for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
@ -1031,9 +1033,9 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
/// \return The PhysReg which is the best candidate for eviction and the
/// eviction cost in BestEvictweight
MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
LiveInterval &VirtReg,
const LiveInterval &VirtReg,
SlotIndex Start, SlotIndex End,
float *BestEvictweight) {
float *BestEvictweight) const {
EvictionCost BestEvictCost;
BestEvictCost.setMax();
BestEvictCost.MaxWeight = VirtReg.weight();
@ -1556,25 +1558,9 @@ bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit,
return false;
}
// Check if the local interval will evict a cheaper interval.
float CheapestEvictWeight = 0;
MCRegister FutureEvictedPhysReg = getCheapestEvicteeWeight(
Order, LIS->getInterval(VirtRegToSplit), Cand.Intf.first(),
Cand.Intf.last(), &CheapestEvictWeight);
// Have we found an interval that can be evicted?
if (FutureEvictedPhysReg) {
float splitArtifactWeight =
VRAI->futureWeight(LIS->getInterval(VirtRegToSplit),
Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
// Will the weight of the local interval be higher than the cheapest evictee
// weight? If so it will evict it and will not cause a spill.
if (splitArtifactWeight >= 0 && splitArtifactWeight > CheapestEvictWeight)
return false;
}
// The local interval is not able to find non interferencing assignment and
// not able to evict a less worthy interval, therfore, it can cause a spill.
// The local interval is not able to find non interferencing assignment
// and not able to evict a less worthy interval, therfore, it can cause a
// spill.
return true;
}

View File

@ -557,7 +557,7 @@ public:
bool enableEarlyIfConversion() const override;
bool enableAdvancedRASplitCost() const override { return true; }
bool enableAdvancedRASplitCost() const override { return false; }
std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;

View File

@ -941,7 +941,7 @@ public:
return TargetSubtargetInfo::ANTIDEP_CRITICAL;
}
bool enableAdvancedRASplitCost() const override { return true; }
bool enableAdvancedRASplitCost() const override { return false; }
};
} // end namespace llvm

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-arm-none-eabi < %s | FileCheck %s
; RUN: llc -consider-local-interval-cost -mtriple=aarch64-arm-none-eabi < %s | FileCheck %s
@A = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8
@B = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
; RUN: llc -consider-local-interval-cost < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
; Make sure bad eviction sequence doesnt occur
; Fix for bugzilla 26810.

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
; RUN: llc -consider-local-interval-cost < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
; Make sure bad eviction sequence doesnt occur
; Part of the fix for bugzilla 26810.

View File

@ -162,9 +162,9 @@ define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind
; X86-NOBMI-NEXT: movl (%esp), %edi # 4-byte Reload
; X86-NOBMI-NEXT: adcl $0, %edi
; X86-NOBMI-NEXT: movl %ebp, %esi
; X86-NOBMI-NEXT: xorl %ebx, %esi
; X86-NOBMI-NEXT: xorl {{[0-9]+}}(%esp), %esi
; X86-NOBMI-NEXT: movl %edi, (%esp) # 4-byte Spill
; X86-NOBMI-NEXT: xorl {{[0-9]+}}(%esp), %edi
; X86-NOBMI-NEXT: xorl %ebx, %edi
; X86-NOBMI-NEXT: orl %esi, %edi
; X86-NOBMI-NEXT: jne .LBB1_2
; X86-NOBMI-NEXT: .LBB1_3: # %for.end

View File

@ -390,25 +390,28 @@ define <1 x i64> @test3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: testl %ecx, %ecx
; X32-NEXT: je .LBB3_1
; X32-NEXT: # %bb.2: # %bb26.preheader
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: xorl %ebx, %ebx
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB3_3: # %bb26
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: movl (%edi,%ebx,8), %ebp
; X32-NEXT: movl %ecx, %esi
; X32-NEXT: movl 4(%edi,%ebx,8), %ecx
; X32-NEXT: addl (%esi,%ebx,8), %ebp
; X32-NEXT: adcl 4(%esi,%ebx,8), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: addl (%edi,%ebx,8), %ebp
; X32-NEXT: adcl 4(%edi,%ebx,8), %ecx
; X32-NEXT: addl %ebp, %eax
; X32-NEXT: adcl %ecx, %edx
; X32-NEXT: movl %esi, %ecx
; X32-NEXT: incl %ebx
; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ebx
; X32-NEXT: cmpl %esi, %ebx
; X32-NEXT: jb .LBB3_3
; X32-NEXT: jmp .LBB3_4
; X32-NEXT: .LBB3_1:

View File

@ -450,49 +450,51 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: subl $28, %esp
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
; CHECK-NEXT: movl %ebp, %eax
; CHECK-NEXT: imull %ecx, %eax
; CHECK-NEXT: movl %ebp, %edx
; CHECK-NEXT: imull %eax, %edx
; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: je LBB1_19
; CHECK-NEXT: ## %bb.1: ## %bb10.preheader
; CHECK-NEXT: shrl $2, %eax
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: movl %edx, %ecx
; CHECK-NEXT: shrl $2, %ecx
; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: testl %ebp, %ebp
; CHECK-NEXT: movl %eax, %edi
; CHECK-NEXT: je LBB1_12
; CHECK-NEXT: ## %bb.2: ## %bb.nph9
; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: je LBB1_12
; CHECK-NEXT: ## %bb.3: ## %bb.nph9.split
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: incl %eax
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB1_6: ## %bb7.preheader
; CHECK-NEXT: ## =>This Loop Header: Depth=1
; CHECK-NEXT: ## Child Loop BB1_4 Depth 2
; CHECK-NEXT: movl %esi, %edx
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB1_4: ## %bb6
; CHECK-NEXT: ## Parent Loop BB1_6 Depth=1
; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movzbl (%eax,%esi,2), %ebx
; CHECK-NEXT: movb %bl, (%edx,%esi)
; CHECK-NEXT: incl %esi
; CHECK-NEXT: cmpl %edi, %esi
; CHECK-NEXT: jb LBB1_4
; CHECK-NEXT: ## %bb.5: ## %bb9
; CHECK-NEXT: ## in Loop: Header=BB1_6 Depth=1
; CHECK-NEXT: ## in Loop: Header=BB1_4 Depth=1
; CHECK-NEXT: movl %edi, %ebx
; CHECK-NEXT: incl %ecx
; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: addl %edi, %edx
; CHECK-NEXT: cmpl %ebp, %ecx
; CHECK-NEXT: jne LBB1_6
; CHECK-NEXT: je LBB1_12
; CHECK-NEXT: ## %bb.6: ## %bb7.preheader
; CHECK-NEXT: ## in Loop: Header=BB1_4 Depth=1
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: jmp LBB1_4
; CHECK-NEXT: LBB1_12: ## %bb18.loopexit
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
@ -501,10 +503,10 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
; CHECK-NEXT: cmpl $1, %ebp
; CHECK-NEXT: jbe LBB1_13
; CHECK-NEXT: ## %bb.7: ## %bb.nph5
; CHECK-NEXT: cmpl $2, {{[0-9]+}}(%esp)
; CHECK-NEXT: cmpl $2, %edi
; CHECK-NEXT: jb LBB1_13
; CHECK-NEXT: ## %bb.8: ## %bb.nph5.split
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
; CHECK-NEXT: movl %edi, %ebp
; CHECK-NEXT: shrl %ebp
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: shrl %eax
@ -518,14 +520,14 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
; CHECK-NEXT: addl %edx, %eax
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: xorl %edi, %edi
; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB1_9: ## %bb13
; CHECK-NEXT: ## =>This Loop Header: Depth=1
; CHECK-NEXT: ## Child Loop BB1_10 Depth 2
; CHECK-NEXT: movl %edi, %ebx
; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: andl $1, %ebx
; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: movl %edx, (%esp) ## 4-byte Spill
; CHECK-NEXT: addl %edx, %ebx
; CHECK-NEXT: imull {{[0-9]+}}(%esp), %ebx
; CHECK-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
@ -543,26 +545,27 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
; CHECK-NEXT: jb LBB1_10
; CHECK-NEXT: ## %bb.11: ## %bb17
; CHECK-NEXT: ## in Loop: Header=BB1_9 Depth=1
; CHECK-NEXT: incl %edi
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
; CHECK-NEXT: incl %ebx
; CHECK-NEXT: addl %ebp, %ecx
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
; CHECK-NEXT: movl (%esp), %edx ## 4-byte Reload
; CHECK-NEXT: addl $2, %edx
; CHECK-NEXT: addl %ebp, %eax
; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
; CHECK-NEXT: cmpl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
; CHECK-NEXT: jb LBB1_9
; CHECK-NEXT: LBB1_13: ## %bb20
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: cmpl $1, %edx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
; CHECK-NEXT: cmpl $1, %esi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
; CHECK-NEXT: je LBB1_19
; CHECK-NEXT: ## %bb.14: ## %bb20
; CHECK-NEXT: cmpl $3, %edx
; CHECK-NEXT: cmpl $3, %esi
; CHECK-NEXT: jne LBB1_24
; CHECK-NEXT: ## %bb.15: ## %bb22
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
; CHECK-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
; CHECK-NEXT: addl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
; CHECK-NEXT: testl %ebp, %ebp
; CHECK-NEXT: je LBB1_18
; CHECK-NEXT: ## %bb.16: ## %bb.nph
@ -570,9 +573,11 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
; CHECK-NEXT: leal 15(%ebp), %eax
; CHECK-NEXT: andl $-16, %eax
; CHECK-NEXT: imull {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: leal 15(%ecx), %ebx
; CHECK-NEXT: andl $-16, %ebx
; CHECK-NEXT: addl %eax, %edi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: addl $15, %edx
; CHECK-NEXT: andl $-16, %edx
; CHECK-NEXT: movl %edx, (%esp) ## 4-byte Spill
; CHECK-NEXT: addl %eax, %ecx
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: leal (%edx,%eax), %ebp
@ -580,14 +585,16 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
; CHECK-NEXT: LBB1_17: ## %bb23
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: subl $4, %esp
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: movl %ecx, %edi
; CHECK-NEXT: calll _memcpy
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl %edi, %ecx
; CHECK-NEXT: addl $16, %esp
; CHECK-NEXT: addl %ecx, %ebp
; CHECK-NEXT: addl %ebx, %edi
; CHECK-NEXT: addl %ebx, %ebp
; CHECK-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload
; CHECK-NEXT: decl %esi
; CHECK-NEXT: jne LBB1_17
; CHECK-NEXT: LBB1_18: ## %bb26
@ -607,21 +614,24 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
; CHECK-NEXT: je LBB1_22
; CHECK-NEXT: ## %bb.20: ## %bb.nph11
; CHECK-NEXT: movl %ebp, %esi
; CHECK-NEXT: leal 15(%ecx), %ebx
; CHECK-NEXT: andl $-16, %ebx
; CHECK-NEXT: movl %eax, %edi
; CHECK-NEXT: addl $15, %eax
; CHECK-NEXT: andl $-16, %eax
; CHECK-NEXT: movl %eax, (%esp) ## 4-byte Spill
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB1_21: ## %bb30
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: subl $4, %esp
; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: movl %ecx, %ebx
; CHECK-NEXT: calll _memcpy
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl %ebx, %ecx
; CHECK-NEXT: addl $16, %esp
; CHECK-NEXT: addl %ecx, %ebp
; CHECK-NEXT: addl %ebx, %edi
; CHECK-NEXT: addl %edi, %ebp
; CHECK-NEXT: addl (%esp), %ecx ## 4-byte Folded Reload
; CHECK-NEXT: decl %esi
; CHECK-NEXT: jne LBB1_21
; CHECK-NEXT: LBB1_22: ## %bb33