forked from OSchip/llvm-project
In the pre-RA scheduler, maintain cmp+br proximity.
This is done by pushing physical register definitions close to their use, which happens to handle flag definitions if they're not glued to the branch. This seems to be generally a good thing though, so I didn't need to add a target hook yet. The primary motivation is to generate code closer to what people expect and rule out missed opportunity from enabling macro-op fusion. As a side benefit, we get several 2-5% gains on x86 benchmarks. There is one regression: SingleSource/Benchmarks/Shootout/lists slows down be -10%. But this is an independent scheduler bug that will be tracked separately. See rdar://problem/9283108. Incidentally, pre-RA scheduling is only half the solution. Fixing the later passes is tracked by: <rdar://problem/8932804> [pre-RA-sched] on x86, attempt to schedule CMP/TEST adjacent with condition jump Fixes: <rdar://problem/9262453> Scheduler unnecessary break of cmp/jump fusion llvm-svn: 129508
This commit is contained in:
parent
d175c98d41
commit
bfbd972b1f
|
@ -260,6 +260,7 @@ namespace llvm {
|
||||||
bool isAvailable : 1; // True once available.
|
bool isAvailable : 1; // True once available.
|
||||||
bool isScheduled : 1; // True once scheduled.
|
bool isScheduled : 1; // True once scheduled.
|
||||||
bool isScheduleHigh : 1; // True if preferable to schedule high.
|
bool isScheduleHigh : 1; // True if preferable to schedule high.
|
||||||
|
bool isScheduleLow : 1; // True if preferable to schedule low.
|
||||||
bool isCloned : 1; // True if this node has been cloned.
|
bool isCloned : 1; // True if this node has been cloned.
|
||||||
Sched::Preference SchedulingPref; // Scheduling preference.
|
Sched::Preference SchedulingPref; // Scheduling preference.
|
||||||
|
|
||||||
|
@ -282,7 +283,7 @@ namespace llvm {
|
||||||
isVRegCycle(false), isCall(false), isTwoAddress(false),
|
isVRegCycle(false), isCall(false), isTwoAddress(false),
|
||||||
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
||||||
isPending(false), isAvailable(false), isScheduled(false),
|
isPending(false), isAvailable(false), isScheduled(false),
|
||||||
isScheduleHigh(false), isCloned(false),
|
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
|
||||||
SchedulingPref(Sched::None),
|
SchedulingPref(Sched::None),
|
||||||
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
|
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
|
||||||
CopyDstRC(NULL), CopySrcRC(NULL) {}
|
CopyDstRC(NULL), CopySrcRC(NULL) {}
|
||||||
|
@ -296,7 +297,7 @@ namespace llvm {
|
||||||
isVRegCycle(false), isCall(false), isTwoAddress(false),
|
isVRegCycle(false), isCall(false), isTwoAddress(false),
|
||||||
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
||||||
isPending(false), isAvailable(false), isScheduled(false),
|
isPending(false), isAvailable(false), isScheduled(false),
|
||||||
isScheduleHigh(false), isCloned(false),
|
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
|
||||||
SchedulingPref(Sched::None),
|
SchedulingPref(Sched::None),
|
||||||
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
|
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
|
||||||
CopyDstRC(NULL), CopySrcRC(NULL) {}
|
CopyDstRC(NULL), CopySrcRC(NULL) {}
|
||||||
|
@ -309,7 +310,7 @@ namespace llvm {
|
||||||
isVRegCycle(false), isCall(false), isTwoAddress(false),
|
isVRegCycle(false), isCall(false), isTwoAddress(false),
|
||||||
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
||||||
isPending(false), isAvailable(false), isScheduled(false),
|
isPending(false), isAvailable(false), isScheduled(false),
|
||||||
isScheduleHigh(false), isCloned(false),
|
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
|
||||||
SchedulingPref(Sched::None),
|
SchedulingPref(Sched::None),
|
||||||
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
|
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
|
||||||
CopyDstRC(NULL), CopySrcRC(NULL) {}
|
CopyDstRC(NULL), CopySrcRC(NULL) {}
|
||||||
|
|
|
@ -71,6 +71,7 @@ static cl::opt<bool> DisableSchedCycles(
|
||||||
cl::desc("Disable cycle-level precision during preRA scheduling"));
|
cl::desc("Disable cycle-level precision during preRA scheduling"));
|
||||||
|
|
||||||
// Temporary sched=list-ilp flags until the heuristics are robust.
|
// Temporary sched=list-ilp flags until the heuristics are robust.
|
||||||
|
// Some options are also available under sched=list-hybrid.
|
||||||
static cl::opt<bool> DisableSchedRegPressure(
|
static cl::opt<bool> DisableSchedRegPressure(
|
||||||
"disable-sched-reg-pressure", cl::Hidden, cl::init(false),
|
"disable-sched-reg-pressure", cl::Hidden, cl::init(false),
|
||||||
cl::desc("Disable regpressure priority in sched=list-ilp"));
|
cl::desc("Disable regpressure priority in sched=list-ilp"));
|
||||||
|
@ -80,6 +81,9 @@ static cl::opt<bool> DisableSchedLiveUses(
|
||||||
static cl::opt<bool> DisableSchedVRegCycle(
|
static cl::opt<bool> DisableSchedVRegCycle(
|
||||||
"disable-sched-vrcycle", cl::Hidden, cl::init(false),
|
"disable-sched-vrcycle", cl::Hidden, cl::init(false),
|
||||||
cl::desc("Disable virtual register cycle interference checks"));
|
cl::desc("Disable virtual register cycle interference checks"));
|
||||||
|
static cl::opt<bool> DisableSchedPhysRegJoin(
|
||||||
|
"disable-sched-physreg-join", cl::Hidden, cl::init(false),
|
||||||
|
cl::desc("Disable physreg def-use affinity"));
|
||||||
static cl::opt<bool> DisableSchedStalls(
|
static cl::opt<bool> DisableSchedStalls(
|
||||||
"disable-sched-stalls", cl::Hidden, cl::init(true),
|
"disable-sched-stalls", cl::Hidden, cl::init(true),
|
||||||
cl::desc("Disable no-stall priority in sched=list-ilp"));
|
cl::desc("Disable no-stall priority in sched=list-ilp"));
|
||||||
|
@ -1638,6 +1642,20 @@ ILPBURRPriorityQueue;
|
||||||
// Static Node Priority for Register Pressure Reduction
|
// Static Node Priority for Register Pressure Reduction
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// Check for special nodes that bypass scheduling heuristics.
|
||||||
|
// Currently this pushes TokenFactor nodes down, but may be used for other
|
||||||
|
// pseudo-ops as well.
|
||||||
|
//
|
||||||
|
// Return -1 to schedule right above left, 1 for left above right.
|
||||||
|
// Return 0 if no bias exists.
|
||||||
|
static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
|
||||||
|
bool LSchedLow = left->isScheduleLow;
|
||||||
|
bool RSchedLow = right->isScheduleLow;
|
||||||
|
if (LSchedLow != RSchedLow)
|
||||||
|
return LSchedLow < RSchedLow ? 1 : -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
|
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
|
||||||
/// Smaller number is the higher priority.
|
/// Smaller number is the higher priority.
|
||||||
static unsigned
|
static unsigned
|
||||||
|
@ -2198,25 +2216,32 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
|
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
|
||||||
|
// Schedule physical register definitions close to their use. This is
|
||||||
|
// motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
|
||||||
|
// long as shortening physreg live ranges is generally good, we can defer
|
||||||
|
// creating a subtarget hook.
|
||||||
|
if (!DisableSchedPhysRegJoin) {
|
||||||
|
bool LHasPhysReg = left->hasPhysRegDefs;
|
||||||
|
bool RHasPhysReg = right->hasPhysRegDefs;
|
||||||
|
if (LHasPhysReg != RHasPhysReg) {
|
||||||
|
DEBUG(++FactorCount[FactRegUses]);
|
||||||
|
#ifndef NDEBUG
|
||||||
|
const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
|
||||||
|
#endif
|
||||||
|
DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
|
||||||
|
<< PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
|
||||||
|
<< PhysRegMsg[RHasPhysReg] << "\n");
|
||||||
|
return LHasPhysReg < RHasPhysReg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prioritize by Seith-Ulmann number and push CopyToReg nodes down.
|
||||||
unsigned LPriority = SPQ->getNodePriority(left);
|
unsigned LPriority = SPQ->getNodePriority(left);
|
||||||
unsigned RPriority = SPQ->getNodePriority(right);
|
unsigned RPriority = SPQ->getNodePriority(right);
|
||||||
if (LPriority != RPriority) {
|
if (LPriority != RPriority) {
|
||||||
DEBUG(++FactorCount[FactStatic]);
|
DEBUG(++FactorCount[FactStatic]);
|
||||||
return LPriority > RPriority;
|
return LPriority > RPriority;
|
||||||
}
|
}
|
||||||
else if(LPriority == 0) {
|
|
||||||
// Schedule zero-latency TokenFactor below any other special
|
|
||||||
// nodes. The alternative may be to avoid artificially boosting the
|
|
||||||
// TokenFactor's height when it is scheduled, but we currently rely on an
|
|
||||||
// instruction's final height to equal the cycle in which it is scheduled,
|
|
||||||
// so heights are monotonically increasing.
|
|
||||||
unsigned LOpc = left->getNode() ? left->getNode()->getOpcode() : 0;
|
|
||||||
unsigned ROpc = right->getNode() ? right->getNode()->getOpcode() : 0;
|
|
||||||
if (LOpc == ISD::TokenFactor)
|
|
||||||
return false;
|
|
||||||
if (ROpc == ISD::TokenFactor)
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try schedule def + use closer when Sethi-Ullman numbers are the same.
|
// Try schedule def + use closer when Sethi-Ullman numbers are the same.
|
||||||
// e.g.
|
// e.g.
|
||||||
|
@ -2275,11 +2300,17 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
|
||||||
|
|
||||||
// Bottom up
|
// Bottom up
|
||||||
bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
|
bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
|
||||||
|
if (int res = checkSpecialNodes(left, right))
|
||||||
|
return res > 0;
|
||||||
|
|
||||||
return BURRSort(left, right, SPQ);
|
return BURRSort(left, right, SPQ);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Source order, otherwise bottom up.
|
// Source order, otherwise bottom up.
|
||||||
bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
|
bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
|
||||||
|
if (int res = checkSpecialNodes(left, right))
|
||||||
|
return res > 0;
|
||||||
|
|
||||||
unsigned LOrder = SPQ->getNodeOrdering(left);
|
unsigned LOrder = SPQ->getNodeOrdering(left);
|
||||||
unsigned ROrder = SPQ->getNodeOrdering(right);
|
unsigned ROrder = SPQ->getNodeOrdering(right);
|
||||||
|
|
||||||
|
@ -2311,6 +2342,9 @@ bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
|
||||||
|
|
||||||
// Return true if right should be scheduled with higher priority than left.
|
// Return true if right should be scheduled with higher priority than left.
|
||||||
bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
|
bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
|
||||||
|
if (int res = checkSpecialNodes(left, right))
|
||||||
|
return res > 0;
|
||||||
|
|
||||||
if (left->isCall || right->isCall)
|
if (left->isCall || right->isCall)
|
||||||
// No way to compute latency of calls.
|
// No way to compute latency of calls.
|
||||||
return BURRSort(left, right, SPQ);
|
return BURRSort(left, right, SPQ);
|
||||||
|
@ -2376,6 +2410,9 @@ static bool canEnableCoalescing(SUnit *SU) {
|
||||||
// list-ilp is currently an experimental scheduler that allows various
|
// list-ilp is currently an experimental scheduler that allows various
|
||||||
// heuristics to be enabled prior to the normal register reduction logic.
|
// heuristics to be enabled prior to the normal register reduction logic.
|
||||||
bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
|
bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
|
||||||
|
if (int res = checkSpecialNodes(left, right))
|
||||||
|
return res > 0;
|
||||||
|
|
||||||
if (left->isCall || right->isCall)
|
if (left->isCall || right->isCall)
|
||||||
// No way to compute latency of calls.
|
// No way to compute latency of calls.
|
||||||
return BURRSort(left, right, SPQ);
|
return BURRSort(left, right, SPQ);
|
||||||
|
@ -2734,6 +2771,9 @@ static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
|
||||||
|
|
||||||
// Top down
|
// Top down
|
||||||
bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
|
bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
|
||||||
|
if (int res = checkSpecialNodes(left, right))
|
||||||
|
return res < 0;
|
||||||
|
|
||||||
unsigned LPriority = SPQ->getNodePriority(left);
|
unsigned LPriority = SPQ->getNodePriority(left);
|
||||||
unsigned RPriority = SPQ->getNodePriority(right);
|
unsigned RPriority = SPQ->getNodePriority(right);
|
||||||
bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
|
bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
|
||||||
|
|
|
@ -87,6 +87,8 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
|
||||||
SU->isCommutable = Old->isCommutable;
|
SU->isCommutable = Old->isCommutable;
|
||||||
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
|
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
|
||||||
SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
|
SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
|
||||||
|
SU->isScheduleHigh = Old->isScheduleHigh;
|
||||||
|
SU->isScheduleLow = Old->isScheduleLow;
|
||||||
SU->SchedulingPref = Old->SchedulingPref;
|
SU->SchedulingPref = Old->SchedulingPref;
|
||||||
Old->isCloned = true;
|
Old->isCloned = true;
|
||||||
return SU;
|
return SU;
|
||||||
|
@ -335,6 +337,12 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
|
||||||
if (!HasGlueUse) break;
|
if (!HasGlueUse) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Schedule zero-latency TokenFactor below any nodes that may increase the
|
||||||
|
// schedule height. Otherwise, ancestors of the TokenFactor may appear to
|
||||||
|
// have false stalls.
|
||||||
|
if (NI->getOpcode() == ISD::TokenFactor)
|
||||||
|
NodeSUnit->isScheduleLow = true;
|
||||||
|
|
||||||
// If there are glue operands involved, N is now the bottom-most node
|
// If there are glue operands involved, N is now the bottom-most node
|
||||||
// of the sequence of nodes that are glued together.
|
// of the sequence of nodes that are glued together.
|
||||||
// Update the SUnit.
|
// Update the SUnit.
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=yonah | FileCheck %s
|
||||||
|
; Reduced from JavaScriptCore
|
||||||
|
|
||||||
|
%"class.JSC::CodeLocationCall" = type { [8 x i8] }
|
||||||
|
%"class.JSC::JSGlobalData" = type { [4 x i8] }
|
||||||
|
%"class.JSC::FunctionPtr" = type { i8* }
|
||||||
|
%"class.JSC::Structure" = type { [4 x i8] }
|
||||||
|
%"class.JSC::UString" = type { i8* }
|
||||||
|
%"class.JSC::JSString" = type { [16 x i8], i32, %"class.JSC::UString", i32 }
|
||||||
|
|
||||||
|
declare hidden fastcc void @_ZN3JSCL23returnToThrowTrampolineEPNS_12JSGlobalDataENS_16ReturnAddressPtrERS2_(%"class.JSC::JSGlobalData"* nocapture, i8*, %"class.JSC::FunctionPtr"* nocapture) nounwind noinline ssp
|
||||||
|
|
||||||
|
; Avoid hoisting the test above loads or copies
|
||||||
|
; CHECK: %entry
|
||||||
|
; CHECK: cmpq
|
||||||
|
; CHECK-NOT: mov
|
||||||
|
; CHECK: jb
|
||||||
|
define i32 @cti_op_eq(i8** nocapture %args) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%0 = load i8** null, align 8
|
||||||
|
%tmp13 = bitcast i8* %0 to %"class.JSC::CodeLocationCall"*
|
||||||
|
%tobool.i.i.i = icmp ugt i8* undef, inttoptr (i64 281474976710655 to i8*)
|
||||||
|
%or.cond.i = and i1 %tobool.i.i.i, undef
|
||||||
|
br i1 %or.cond.i, label %if.then.i, label %if.end.i
|
||||||
|
|
||||||
|
if.then.i: ; preds = %entry
|
||||||
|
br i1 undef, label %if.then.i.i.i, label %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit
|
||||||
|
|
||||||
|
if.then.i.i.i: ; preds = %if.then.i
|
||||||
|
%conv.i.i.i.i = trunc i64 undef to i32
|
||||||
|
br label %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit
|
||||||
|
|
||||||
|
if.end.i: ; preds = %entry
|
||||||
|
br i1 undef, label %land.rhs.i121.i, label %_ZNK3JSC7JSValue8isStringEv.exit122.i
|
||||||
|
|
||||||
|
land.rhs.i121.i: ; preds = %if.end.i
|
||||||
|
%tmp.i.i117.i = load %"class.JSC::Structure"** undef, align 8
|
||||||
|
br label %_ZNK3JSC7JSValue8isStringEv.exit122.i
|
||||||
|
|
||||||
|
_ZNK3JSC7JSValue8isStringEv.exit122.i: ; preds = %land.rhs.i121.i, %if.end.i
|
||||||
|
%brmerge.i = or i1 undef, false
|
||||||
|
%or.cond = or i1 false, %brmerge.i
|
||||||
|
br i1 %or.cond, label %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit, label %if.then.i92.i
|
||||||
|
|
||||||
|
if.then.i92.i: ; preds = %_ZNK3JSC7JSValue8isStringEv.exit122.i
|
||||||
|
tail call void @_ZNK3JSC8JSString11resolveRopeEPNS_9ExecStateE(%"class.JSC::JSString"* undef, %"class.JSC::CodeLocationCall"* %tmp13) nounwind
|
||||||
|
unreachable
|
||||||
|
|
||||||
|
_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit: ; preds = %_ZNK3JSC7JSValue8isStringEv.exit122.i, %if.then.i.i.i, %if.then.i
|
||||||
|
|
||||||
|
%1 = load i8** undef, align 8
|
||||||
|
br i1 undef, label %do.end39, label %do.body27
|
||||||
|
|
||||||
|
do.body27: ; preds = %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit
|
||||||
|
%tmp30 = bitcast i8* %1 to %"class.JSC::JSGlobalData"*
|
||||||
|
%2 = getelementptr inbounds i8** %args, i64 -1
|
||||||
|
%3 = bitcast i8** %2 to %"class.JSC::FunctionPtr"*
|
||||||
|
tail call fastcc void @_ZN3JSCL23returnToThrowTrampolineEPNS_12JSGlobalDataENS_16ReturnAddressPtrERS2_(%"class.JSC::JSGlobalData"* %tmp30, i8* undef, %"class.JSC::FunctionPtr"* %3)
|
||||||
|
unreachable
|
||||||
|
|
||||||
|
do.end39: ; preds = %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit
|
||||||
|
ret i32 undef
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @_ZNK3JSC8JSString11resolveRopeEPNS_9ExecStateE(%"class.JSC::JSString"*, %"class.JSC::CodeLocationCall"*)
|
|
@ -1,4 +1,3 @@
|
||||||
; XFAIL: *
|
|
||||||
; RUN: llc -march=x86-64 < %s | FileCheck %s
|
; RUN: llc -march=x86-64 < %s | FileCheck %s
|
||||||
|
|
||||||
; CHECK: decq
|
; CHECK: decq
|
||||||
|
|
|
@ -18,7 +18,8 @@ forcond.preheader: ; preds = %entry
|
||||||
; CHECK: movl $1
|
; CHECK: movl $1
|
||||||
; CHECK-NOT: xorl
|
; CHECK-NOT: xorl
|
||||||
; CHECK-NOT: movl
|
; CHECK-NOT: movl
|
||||||
; CHECK-NEXT: je
|
; CHECK-NOT: LBB
|
||||||
|
; CHECK: je
|
||||||
|
|
||||||
ifthen: ; preds = %entry
|
ifthen: ; preds = %entry
|
||||||
ret i32 0
|
ret i32 0
|
||||||
|
|
|
@ -109,15 +109,15 @@ altret:
|
||||||
|
|
||||||
; CHECK: dont_merge_oddly:
|
; CHECK: dont_merge_oddly:
|
||||||
; CHECK-NOT: ret
|
; CHECK-NOT: ret
|
||||||
; CHECK: ucomiss %xmm1, %xmm2
|
; CHECK: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
|
||||||
; CHECK-NEXT: jbe .LBB2_3
|
; CHECK-NEXT: jbe .LBB2_3
|
||||||
; CHECK-NEXT: ucomiss %xmm0, %xmm1
|
; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
|
||||||
; CHECK-NEXT: ja .LBB2_4
|
; CHECK-NEXT: ja .LBB2_4
|
||||||
; CHECK-NEXT: .LBB2_2:
|
; CHECK-NEXT: .LBB2_2:
|
||||||
; CHECK-NEXT: movb $1, %al
|
; CHECK-NEXT: movb $1, %al
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
; CHECK-NEXT: .LBB2_3:
|
; CHECK-NEXT: .LBB2_3:
|
||||||
; CHECK-NEXT: ucomiss %xmm0, %xmm2
|
; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}}
|
||||||
; CHECK-NEXT: jbe .LBB2_2
|
; CHECK-NEXT: jbe .LBB2_2
|
||||||
; CHECK-NEXT: .LBB2_4:
|
; CHECK-NEXT: .LBB2_4:
|
||||||
; CHECK-NEXT: xorb %al, %al
|
; CHECK-NEXT: xorb %al, %al
|
||||||
|
|
|
@ -2,10 +2,10 @@
|
||||||
; rdar://5752025
|
; rdar://5752025
|
||||||
|
|
||||||
; We want:
|
; We want:
|
||||||
; CHECK: movl 4(%esp), %ecx
|
; CHECK: movl $42, %ecx
|
||||||
; CHECK-NEXT: andl $15, %ecx
|
; CHECK-NEXT: movl 4(%esp), %eax
|
||||||
; CHECK-NEXT: movl $42, %eax
|
; CHECK-NEXT: andl $15, %eax
|
||||||
; CHECK-NEXT: cmovel %ecx, %eax
|
; CHECK-NEXT: cmovnel %ecx, %eax
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
;
|
;
|
||||||
; We don't want:
|
; We don't want:
|
||||||
|
|
Loading…
Reference in New Issue