forked from OSchip/llvm-project
[x86] use more shift or LEA for select-of-constants (2nd try)
The previous rev (r310208) failed to account for overflow when subtracting the constants to see if they're suitable for shift/lea. This version add a check for that and more test were added in r310490. We can convert any select-of-constants to math ops: http://rise4fun.com/Alive/d7d For this patch, I'm enhancing an existing x86 transform that uses fake multiplies (they always become shl/lea) to avoid cmov or branching. The current code misses cases where we have a negative constant and a positive constant, so this is just trying to plug that hole. The DAGCombiner diff prevents us from hitting a terrible inefficiency: we can start with a select in IR, create a select DAG node, convert it into a sext, convert it back into a select, and then lower it to sext machine code. Some notes about the test diffs: 1. 2010-08-04-MaskedSignedCompare.ll - We were creating control flow that didn't exist in the IR. 2. memcmp.ll - Choose -1 or 1 is the case that got me looking at this again. We could avoid the push/pop in some cases if we used 'movzbl %al' instead of an xor on a different reg? That's a post-DAG problem though. 3. mul-constant-result.ll - The trade-off between sbb+not vs. setne+neg could be addressed if that's a regression, but those would always be nearly equivalent. 4. pr22338.ll and sext-i1.ll - These tests have undef operands, so we don't actually care about these diffs. 5. sbb.ll - This shows a win for what is likely a common case: choose -1 or 0. 6. select.ll - There's another borderline case here: cmp+sbb+or vs. test+set+lea? Also, sbb+not vs. setae+neg shows up again. 7. select_const.ll - These are motivating cases for the enhancement; replace cmov with cheaper ops. Assembly differences between movzbl and xor to avoid a partial reg stall are caused later by the X86 Fixup SetCC pass. Differential Revision: https://reviews.llvm.org/D35340 llvm-svn: 310717
This commit is contained in:
parent
1fb1ce0c87
commit
169dae70a6
|
@ -7396,7 +7396,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
|
||||||
SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
|
SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
|
||||||
return SCC;
|
return SCC;
|
||||||
|
|
||||||
if (!VT.isVector()) {
|
if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath()) {
|
||||||
EVT SetCCVT = getSetCCResultType(N00VT);
|
EVT SetCCVT = getSetCCResultType(N00VT);
|
||||||
// Don't do this transform for i1 because there's a select transform
|
// Don't do this transform for i1 because there's a select transform
|
||||||
// that would reverse it.
|
// that would reverse it.
|
||||||
|
|
|
@ -30081,78 +30081,52 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
// Don't do this for crazy integer types.
|
// Don't do this for crazy integer types.
|
||||||
if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType()))
|
EVT VT = N->getValueType(0);
|
||||||
|
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
// If this is efficiently invertible, canonicalize the LHSC/RHSC values
|
// We're going to use the condition bit in math or logic ops. We could allow
|
||||||
// so that TrueC (the true value) is larger than FalseC.
|
// this with a wider condition value (post-legalization it becomes an i8),
|
||||||
bool NeedsCondInvert = false;
|
// but if nothing is creating selects that late, it doesn't matter.
|
||||||
if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
|
if (Cond.getValueType() != MVT::i1)
|
||||||
// Efficiently invertible.
|
return SDValue();
|
||||||
(Cond.getOpcode() == ISD::SETCC || // setcc -> invertible.
|
|
||||||
(Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible.
|
|
||||||
isa<ConstantSDNode>(Cond.getOperand(1))))) {
|
|
||||||
NeedsCondInvert = true;
|
|
||||||
std::swap(TrueC, FalseC);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0.
|
// A power-of-2 multiply is just a shift. LEA also cheaply handles multiply by
|
||||||
if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
|
// 3, 5, or 9 with i32/i64, so those get transformed too.
|
||||||
if (NeedsCondInvert) // Invert the condition if needed.
|
// TODO: For constants that overflow or do not differ by power-of-2 or small
|
||||||
Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
|
// multiplier, convert to 'and' + 'add'.
|
||||||
DAG.getConstant(1, DL, Cond.getValueType()));
|
const APInt &TrueVal = TrueC->getAPIntValue();
|
||||||
|
const APInt &FalseVal = FalseC->getAPIntValue();
|
||||||
|
bool OV;
|
||||||
|
APInt Diff = TrueVal.ssub_ov(FalseVal, OV);
|
||||||
|
if (OV)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
// Zero extend the condition if needed.
|
APInt AbsDiff = Diff.abs();
|
||||||
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
|
if (AbsDiff.isPowerOf2() ||
|
||||||
|
((VT == MVT::i32 || VT == MVT::i64) &&
|
||||||
|
(AbsDiff == 3 || AbsDiff == 5 || AbsDiff == 9))) {
|
||||||
|
|
||||||
unsigned ShAmt = TrueC->getAPIntValue().logBase2();
|
// We need a positive multiplier constant for shift/LEA codegen. The 'not'
|
||||||
return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
|
// of the condition can usually be folded into a compare predicate, but even
|
||||||
DAG.getConstant(ShAmt, DL, MVT::i8));
|
// without that, the sequence should be cheaper than a CMOV alternative.
|
||||||
}
|
if (TrueVal.slt(FalseVal)) {
|
||||||
|
Cond = DAG.getNOT(DL, Cond, MVT::i1);
|
||||||
// Optimize cases that will turn into an LEA instruction. This requires
|
std::swap(TrueC, FalseC);
|
||||||
// an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
|
|
||||||
if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
|
|
||||||
uint64_t Diff = TrueC->getZExtValue() - FalseC->getZExtValue();
|
|
||||||
if (N->getValueType(0) == MVT::i32)
|
|
||||||
Diff = (unsigned)Diff;
|
|
||||||
|
|
||||||
bool IsFastMultiplier = false;
|
|
||||||
if (Diff < 10) {
|
|
||||||
switch ((unsigned char)Diff) {
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
case 1: // result = add base, cond
|
|
||||||
case 2: // result = lea base( , cond*2)
|
|
||||||
case 3: // result = lea base(cond, cond*2)
|
|
||||||
case 4: // result = lea base( , cond*4)
|
|
||||||
case 5: // result = lea base(cond, cond*4)
|
|
||||||
case 8: // result = lea base( , cond*8)
|
|
||||||
case 9: // result = lea base(cond, cond*8)
|
|
||||||
IsFastMultiplier = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IsFastMultiplier) {
|
// select Cond, TC, FC --> (zext(Cond) * (TC - FC)) + FC
|
||||||
APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue();
|
SDValue R = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
|
||||||
if (NeedsCondInvert) // Invert the condition if needed.
|
|
||||||
Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
|
|
||||||
DAG.getConstant(1, DL, Cond.getValueType()));
|
|
||||||
|
|
||||||
// Zero extend the condition if needed.
|
// Multiply condition by the difference if non-one.
|
||||||
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond);
|
if (!AbsDiff.isOneValue())
|
||||||
// Scale the condition by the difference.
|
R = DAG.getNode(ISD::MUL, DL, VT, R, DAG.getConstant(AbsDiff, DL, VT));
|
||||||
if (Diff != 1)
|
|
||||||
Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
|
|
||||||
DAG.getConstant(Diff, DL, Cond.getValueType()));
|
|
||||||
|
|
||||||
// Add the base if non-zero.
|
// Add the base if non-zero.
|
||||||
if (FalseC->getAPIntValue() != 0)
|
if (!FalseC->isNullValue())
|
||||||
Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
|
R = DAG.getNode(ISD::ADD, DL, VT, R, SDValue(FalseC, 0));
|
||||||
SDValue(FalseC, 0));
|
|
||||||
return Cond;
|
return R;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
|
@ -9,21 +9,19 @@
|
||||||
define i32 @main() nounwind {
|
define i32 @main() nounwind {
|
||||||
; CHECK-LABEL: main:
|
; CHECK-LABEL: main:
|
||||||
; CHECK: # BB#0: # %entry
|
; CHECK: # BB#0: # %entry
|
||||||
; CHECK-NEXT: cmpq $0, {{.*}}(%rip)
|
|
||||||
; CHECK-NEXT: movb $-106, %al
|
|
||||||
; CHECK-NEXT: jne .LBB0_2
|
|
||||||
; CHECK-NEXT: # BB#1: # %entry
|
|
||||||
; CHECK-NEXT: xorl %eax, %eax
|
; CHECK-NEXT: xorl %eax, %eax
|
||||||
; CHECK-NEXT: .LBB0_2: # %entry
|
; CHECK-NEXT: cmpq {{.*}}(%rip), %rax
|
||||||
|
; CHECK-NEXT: sbbl %eax, %eax
|
||||||
|
; CHECK-NEXT: andl $150, %eax
|
||||||
; CHECK-NEXT: testb %al, %al
|
; CHECK-NEXT: testb %al, %al
|
||||||
; CHECK-NEXT: jle .LBB0_3
|
; CHECK-NEXT: jle .LBB0_1
|
||||||
; CHECK-NEXT: # BB#4: # %if.then
|
; CHECK-NEXT: # BB#2: # %if.then
|
||||||
; CHECK-NEXT: movl $1, {{.*}}(%rip)
|
; CHECK-NEXT: movl $1, {{.*}}(%rip)
|
||||||
; CHECK-NEXT: movl $1, %esi
|
; CHECK-NEXT: movl $1, %esi
|
||||||
; CHECK-NEXT: jmp .LBB0_5
|
; CHECK-NEXT: jmp .LBB0_3
|
||||||
; CHECK-NEXT: .LBB0_3: # %entry.if.end_crit_edge
|
; CHECK-NEXT: .LBB0_1: # %entry.if.end_crit_edge
|
||||||
; CHECK-NEXT: movl {{.*}}(%rip), %esi
|
; CHECK-NEXT: movl {{.*}}(%rip), %esi
|
||||||
; CHECK-NEXT: .LBB0_5: # %if.end
|
; CHECK-NEXT: .LBB0_3: # %if.end
|
||||||
; CHECK-NEXT: pushq %rax
|
; CHECK-NEXT: pushq %rax
|
||||||
; CHECK-NEXT: movl $.L.str, %edi
|
; CHECK-NEXT: movl $.L.str, %edi
|
||||||
; CHECK-NEXT: xorl %eax, %eax
|
; CHECK-NEXT: xorl %eax, %eax
|
||||||
|
|
|
@ -125,12 +125,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind optsize {
|
||||||
; X86-NEXT: subl %ecx, %eax
|
; X86-NEXT: subl %ecx, %eax
|
||||||
; X86-NEXT: jmp .LBB4_3
|
; X86-NEXT: jmp .LBB4_3
|
||||||
; X86-NEXT: .LBB4_1: # %res_block
|
; X86-NEXT: .LBB4_1: # %res_block
|
||||||
; X86-NEXT: xorl %ecx, %ecx
|
; X86-NEXT: setae %al
|
||||||
; X86-NEXT: incl %ecx
|
; X86-NEXT: movzbl %al, %eax
|
||||||
; X86-NEXT: xorl %eax, %eax
|
; X86-NEXT: leal -1(%eax,%eax), %eax
|
||||||
; X86-NEXT: decl %eax
|
|
||||||
; X86-NEXT: cmpw %si, %dx
|
|
||||||
; X86-NEXT: cmovael %ecx, %eax
|
|
||||||
; X86-NEXT: .LBB4_3: # %endblock
|
; X86-NEXT: .LBB4_3: # %endblock
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
|
@ -149,9 +146,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind optsize {
|
||||||
; X64-NEXT: subl %ecx, %eax
|
; X64-NEXT: subl %ecx, %eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
; X64-NEXT: .LBB4_1: # %res_block
|
; X64-NEXT: .LBB4_1: # %res_block
|
||||||
; X64-NEXT: movl $-1, %ecx
|
; X64-NEXT: setae %al
|
||||||
; X64-NEXT: movl $1, %eax
|
; X64-NEXT: movzbl %al, %eax
|
||||||
; X64-NEXT: cmovbl %ecx, %eax
|
; X64-NEXT: leal -1(%rax,%rax), %eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
|
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
|
||||||
ret i32 %m
|
ret i32 %m
|
||||||
|
@ -286,12 +283,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind optsize {
|
||||||
; X86-NEXT: subl %ecx, %eax
|
; X86-NEXT: subl %ecx, %eax
|
||||||
; X86-NEXT: jmp .LBB9_3
|
; X86-NEXT: jmp .LBB9_3
|
||||||
; X86-NEXT: .LBB9_1: # %res_block
|
; X86-NEXT: .LBB9_1: # %res_block
|
||||||
; X86-NEXT: xorl %ecx, %ecx
|
; X86-NEXT: setae %al
|
||||||
; X86-NEXT: incl %ecx
|
; X86-NEXT: movzbl %al, %eax
|
||||||
; X86-NEXT: xorl %eax, %eax
|
; X86-NEXT: leal -1(%eax,%eax), %eax
|
||||||
; X86-NEXT: decl %eax
|
|
||||||
; X86-NEXT: cmpl %esi, %edx
|
|
||||||
; X86-NEXT: cmovael %ecx, %eax
|
|
||||||
; X86-NEXT: .LBB9_3: # %endblock
|
; X86-NEXT: .LBB9_3: # %endblock
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
|
@ -310,9 +304,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind optsize {
|
||||||
; X64-NEXT: subl %ecx, %eax
|
; X64-NEXT: subl %ecx, %eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
; X64-NEXT: .LBB9_1: # %res_block
|
; X64-NEXT: .LBB9_1: # %res_block
|
||||||
; X64-NEXT: movl $-1, %ecx
|
; X64-NEXT: setae %al
|
||||||
; X64-NEXT: movl $1, %eax
|
; X64-NEXT: movzbl %al, %eax
|
||||||
; X64-NEXT: cmovbl %ecx, %eax
|
; X64-NEXT: leal -1(%rax,%rax), %eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
|
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
|
||||||
ret i32 %m
|
ret i32 %m
|
||||||
|
@ -381,12 +375,10 @@ define i32 @length8(i8* %X, i8* %Y) nounwind optsize {
|
||||||
; X86-NEXT: cmpl %edx, %ecx
|
; X86-NEXT: cmpl %edx, %ecx
|
||||||
; X86-NEXT: je .LBB11_3
|
; X86-NEXT: je .LBB11_3
|
||||||
; X86-NEXT: .LBB11_1: # %res_block
|
; X86-NEXT: .LBB11_1: # %res_block
|
||||||
; X86-NEXT: xorl %esi, %esi
|
|
||||||
; X86-NEXT: incl %esi
|
|
||||||
; X86-NEXT: xorl %eax, %eax
|
; X86-NEXT: xorl %eax, %eax
|
||||||
; X86-NEXT: decl %eax
|
|
||||||
; X86-NEXT: cmpl %edx, %ecx
|
; X86-NEXT: cmpl %edx, %ecx
|
||||||
; X86-NEXT: cmovael %esi, %eax
|
; X86-NEXT: setae %al
|
||||||
|
; X86-NEXT: leal -1(%eax,%eax), %eax
|
||||||
; X86-NEXT: .LBB11_3: # %endblock
|
; X86-NEXT: .LBB11_3: # %endblock
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
|
@ -531,10 +523,10 @@ define i32 @length12(i8* %X, i8* %Y) nounwind optsize {
|
||||||
; X64-NEXT: # BB#3: # %endblock
|
; X64-NEXT: # BB#3: # %endblock
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
; X64-NEXT: .LBB15_1: # %res_block
|
; X64-NEXT: .LBB15_1: # %res_block
|
||||||
|
; X64-NEXT: xorl %eax, %eax
|
||||||
; X64-NEXT: cmpq %rdx, %rcx
|
; X64-NEXT: cmpq %rdx, %rcx
|
||||||
; X64-NEXT: movl $-1, %ecx
|
; X64-NEXT: setae %al
|
||||||
; X64-NEXT: movl $1, %eax
|
; X64-NEXT: leal -1(%rax,%rax), %eax
|
||||||
; X64-NEXT: cmovbl %ecx, %eax
|
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
|
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
|
||||||
ret i32 %m
|
ret i32 %m
|
||||||
|
@ -572,10 +564,10 @@ define i32 @length16(i8* %X, i8* %Y) nounwind optsize {
|
||||||
; X64-NEXT: # BB#3: # %endblock
|
; X64-NEXT: # BB#3: # %endblock
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
; X64-NEXT: .LBB16_1: # %res_block
|
; X64-NEXT: .LBB16_1: # %res_block
|
||||||
|
; X64-NEXT: xorl %eax, %eax
|
||||||
; X64-NEXT: cmpq %rdx, %rcx
|
; X64-NEXT: cmpq %rdx, %rcx
|
||||||
; X64-NEXT: movl $-1, %ecx
|
; X64-NEXT: setae %al
|
||||||
; X64-NEXT: movl $1, %eax
|
; X64-NEXT: leal -1(%rax,%rax), %eax
|
||||||
; X64-NEXT: cmovbl %ecx, %eax
|
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
|
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
|
||||||
ret i32 %m
|
ret i32 %m
|
||||||
|
|
|
@ -126,9 +126,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind {
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
; X86-NEXT: .LBB4_1: # %res_block
|
; X86-NEXT: .LBB4_1: # %res_block
|
||||||
; X86-NEXT: movl $-1, %ecx
|
; X86-NEXT: setae %al
|
||||||
; X86-NEXT: movl $1, %eax
|
; X86-NEXT: movzbl %al, %eax
|
||||||
; X86-NEXT: cmovbl %ecx, %eax
|
; X86-NEXT: leal -1(%eax,%eax), %eax
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
|
@ -146,9 +146,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind {
|
||||||
; X64-NEXT: subl %ecx, %eax
|
; X64-NEXT: subl %ecx, %eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
; X64-NEXT: .LBB4_1: # %res_block
|
; X64-NEXT: .LBB4_1: # %res_block
|
||||||
; X64-NEXT: movl $-1, %ecx
|
; X64-NEXT: setae %al
|
||||||
; X64-NEXT: movl $1, %eax
|
; X64-NEXT: movzbl %al, %eax
|
||||||
; X64-NEXT: cmovbl %ecx, %eax
|
; X64-NEXT: leal -1(%rax,%rax), %eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
|
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
|
||||||
ret i32 %m
|
ret i32 %m
|
||||||
|
@ -283,9 +283,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind {
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
; X86-NEXT: .LBB9_1: # %res_block
|
; X86-NEXT: .LBB9_1: # %res_block
|
||||||
; X86-NEXT: movl $-1, %ecx
|
; X86-NEXT: setae %al
|
||||||
; X86-NEXT: movl $1, %eax
|
; X86-NEXT: movzbl %al, %eax
|
||||||
; X86-NEXT: cmovbl %ecx, %eax
|
; X86-NEXT: leal -1(%eax,%eax), %eax
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
|
@ -303,9 +303,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind {
|
||||||
; X64-NEXT: subl %ecx, %eax
|
; X64-NEXT: subl %ecx, %eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
; X64-NEXT: .LBB9_1: # %res_block
|
; X64-NEXT: .LBB9_1: # %res_block
|
||||||
; X64-NEXT: movl $-1, %ecx
|
; X64-NEXT: setae %al
|
||||||
; X64-NEXT: movl $1, %eax
|
; X64-NEXT: movzbl %al, %eax
|
||||||
; X64-NEXT: cmovbl %ecx, %eax
|
; X64-NEXT: leal -1(%rax,%rax), %eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
|
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
|
||||||
ret i32 %m
|
ret i32 %m
|
||||||
|
@ -376,10 +376,10 @@ define i32 @length8(i8* %X, i8* %Y) nounwind {
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
; X86-NEXT: .LBB11_1: # %res_block
|
; X86-NEXT: .LBB11_1: # %res_block
|
||||||
|
; X86-NEXT: xorl %eax, %eax
|
||||||
; X86-NEXT: cmpl %edx, %ecx
|
; X86-NEXT: cmpl %edx, %ecx
|
||||||
; X86-NEXT: movl $-1, %ecx
|
; X86-NEXT: setae %al
|
||||||
; X86-NEXT: movl $1, %eax
|
; X86-NEXT: leal -1(%eax,%eax), %eax
|
||||||
; X86-NEXT: cmovbl %ecx, %eax
|
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
|
@ -521,10 +521,10 @@ define i32 @length12(i8* %X, i8* %Y) nounwind {
|
||||||
; X64-NEXT: # BB#3: # %endblock
|
; X64-NEXT: # BB#3: # %endblock
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
; X64-NEXT: .LBB15_1: # %res_block
|
; X64-NEXT: .LBB15_1: # %res_block
|
||||||
|
; X64-NEXT: xorl %eax, %eax
|
||||||
; X64-NEXT: cmpq %rdx, %rcx
|
; X64-NEXT: cmpq %rdx, %rcx
|
||||||
; X64-NEXT: movl $-1, %ecx
|
; X64-NEXT: setae %al
|
||||||
; X64-NEXT: movl $1, %eax
|
; X64-NEXT: leal -1(%rax,%rax), %eax
|
||||||
; X64-NEXT: cmovbl %ecx, %eax
|
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
|
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
|
||||||
ret i32 %m
|
ret i32 %m
|
||||||
|
@ -562,10 +562,10 @@ define i32 @length16(i8* %X, i8* %Y) nounwind {
|
||||||
; X64-NEXT: # BB#3: # %endblock
|
; X64-NEXT: # BB#3: # %endblock
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
; X64-NEXT: .LBB16_1: # %res_block
|
; X64-NEXT: .LBB16_1: # %res_block
|
||||||
|
; X64-NEXT: xorl %eax, %eax
|
||||||
; X64-NEXT: cmpq %rdx, %rcx
|
; X64-NEXT: cmpq %rdx, %rcx
|
||||||
; X64-NEXT: movl $-1, %ecx
|
; X64-NEXT: setae %al
|
||||||
; X64-NEXT: movl $1, %eax
|
; X64-NEXT: leal -1(%rax,%rax), %eax
|
||||||
; X64-NEXT: cmovbl %ecx, %eax
|
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
|
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
|
||||||
ret i32 %m
|
ret i32 %m
|
||||||
|
|
|
@ -16,11 +16,9 @@ define i32 @foo (i64* %so) nounwind uwtable ssp {
|
||||||
; CHECK-NEXT: cmpl 16(%eax), %edx
|
; CHECK-NEXT: cmpl 16(%eax), %edx
|
||||||
; CHECK-NEXT: movl $0, 16(%eax)
|
; CHECK-NEXT: movl $0, 16(%eax)
|
||||||
; CHECK-NEXT: sbbl %ecx, %edx
|
; CHECK-NEXT: sbbl %ecx, %edx
|
||||||
; CHECK-NEXT: movl $-1, %eax
|
; CHECK-NEXT: setl %al
|
||||||
; CHECK-NEXT: jl .LBB0_2
|
; CHECK-NEXT: movzbl %al, %eax
|
||||||
; CHECK-NEXT: # BB#1:
|
; CHECK-NEXT: negl %eax
|
||||||
; CHECK-NEXT: xorl %eax, %eax
|
|
||||||
; CHECK-NEXT: .LBB0_2:
|
|
||||||
; CHECK-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
%used = getelementptr inbounds i64, i64* %so, i32 3
|
%used = getelementptr inbounds i64, i64* %so, i32 3
|
||||||
store i64 0, i64* %used, align 8
|
store i64 0, i64* %used, align 8
|
||||||
|
|
|
@ -952,12 +952,11 @@ define i32 @foo() local_unnamed_addr #0 {
|
||||||
; X86-NEXT: .Lcfi103:
|
; X86-NEXT: .Lcfi103:
|
||||||
; X86-NEXT: .cfi_adjust_cfa_offset -8
|
; X86-NEXT: .cfi_adjust_cfa_offset -8
|
||||||
; X86-NEXT: xorl $32, %eax
|
; X86-NEXT: xorl $32, %eax
|
||||||
|
; X86-NEXT: xorl %ecx, %ecx
|
||||||
; X86-NEXT: orl %ebx, %eax
|
; X86-NEXT: orl %ebx, %eax
|
||||||
; X86-NEXT: movl $-1, %eax
|
; X86-NEXT: setne %cl
|
||||||
; X86-NEXT: jne .LBB1_2
|
; X86-NEXT: negl %ecx
|
||||||
; X86-NEXT: # BB#1:
|
; X86-NEXT: movl %ecx, %eax
|
||||||
; X86-NEXT: xorl %eax, %eax
|
|
||||||
; X86-NEXT: .LBB1_2:
|
|
||||||
; X86-NEXT: popl %esi
|
; X86-NEXT: popl %esi
|
||||||
; X86-NEXT: popl %edi
|
; X86-NEXT: popl %edi
|
||||||
; X86-NEXT: popl %ebx
|
; X86-NEXT: popl %ebx
|
||||||
|
@ -974,23 +973,20 @@ define i32 @foo() local_unnamed_addr #0 {
|
||||||
; X64-HSW-NEXT: pushq %r14
|
; X64-HSW-NEXT: pushq %r14
|
||||||
; X64-HSW-NEXT: .Lcfi2:
|
; X64-HSW-NEXT: .Lcfi2:
|
||||||
; X64-HSW-NEXT: .cfi_def_cfa_offset 32
|
; X64-HSW-NEXT: .cfi_def_cfa_offset 32
|
||||||
; X64-HSW-NEXT: pushq %r12
|
; X64-HSW-NEXT: pushq %rbx
|
||||||
; X64-HSW-NEXT: .Lcfi3:
|
; X64-HSW-NEXT: .Lcfi3:
|
||||||
; X64-HSW-NEXT: .cfi_def_cfa_offset 40
|
; X64-HSW-NEXT: .cfi_def_cfa_offset 40
|
||||||
; X64-HSW-NEXT: pushq %rbx
|
; X64-HSW-NEXT: pushq %rax
|
||||||
; X64-HSW-NEXT: .Lcfi4:
|
; X64-HSW-NEXT: .Lcfi4:
|
||||||
; X64-HSW-NEXT: .cfi_def_cfa_offset 48
|
; X64-HSW-NEXT: .cfi_def_cfa_offset 48
|
||||||
; X64-HSW-NEXT: .Lcfi5:
|
; X64-HSW-NEXT: .Lcfi5:
|
||||||
; X64-HSW-NEXT: .cfi_offset %rbx, -48
|
; X64-HSW-NEXT: .cfi_offset %rbx, -40
|
||||||
; X64-HSW-NEXT: .Lcfi6:
|
; X64-HSW-NEXT: .Lcfi6:
|
||||||
; X64-HSW-NEXT: .cfi_offset %r12, -40
|
|
||||||
; X64-HSW-NEXT: .Lcfi7:
|
|
||||||
; X64-HSW-NEXT: .cfi_offset %r14, -32
|
; X64-HSW-NEXT: .cfi_offset %r14, -32
|
||||||
; X64-HSW-NEXT: .Lcfi8:
|
; X64-HSW-NEXT: .Lcfi7:
|
||||||
; X64-HSW-NEXT: .cfi_offset %r15, -24
|
; X64-HSW-NEXT: .cfi_offset %r15, -24
|
||||||
; X64-HSW-NEXT: .Lcfi9:
|
; X64-HSW-NEXT: .Lcfi8:
|
||||||
; X64-HSW-NEXT: .cfi_offset %rbp, -16
|
; X64-HSW-NEXT: .cfi_offset %rbp, -16
|
||||||
; X64-HSW-NEXT: xorl %r12d, %r12d
|
|
||||||
; X64-HSW-NEXT: movl $1, %edi
|
; X64-HSW-NEXT: movl $1, %edi
|
||||||
; X64-HSW-NEXT: xorl %esi, %esi
|
; X64-HSW-NEXT: xorl %esi, %esi
|
||||||
; X64-HSW-NEXT: callq mult
|
; X64-HSW-NEXT: callq mult
|
||||||
|
@ -1180,11 +1176,13 @@ define i32 @foo() local_unnamed_addr #0 {
|
||||||
; X64-HSW-NEXT: movl $16, %esi
|
; X64-HSW-NEXT: movl $16, %esi
|
||||||
; X64-HSW-NEXT: callq mult
|
; X64-HSW-NEXT: callq mult
|
||||||
; X64-HSW-NEXT: xorl $32, %eax
|
; X64-HSW-NEXT: xorl $32, %eax
|
||||||
|
; X64-HSW-NEXT: xorl %ecx, %ecx
|
||||||
; X64-HSW-NEXT: orl %ebx, %eax
|
; X64-HSW-NEXT: orl %ebx, %eax
|
||||||
; X64-HSW-NEXT: movl $-1, %eax
|
; X64-HSW-NEXT: setne %cl
|
||||||
; X64-HSW-NEXT: cmovel %r12d, %eax
|
; X64-HSW-NEXT: negl %ecx
|
||||||
|
; X64-HSW-NEXT: movl %ecx, %eax
|
||||||
|
; X64-HSW-NEXT: addq $8, %rsp
|
||||||
; X64-HSW-NEXT: popq %rbx
|
; X64-HSW-NEXT: popq %rbx
|
||||||
; X64-HSW-NEXT: popq %r12
|
|
||||||
; X64-HSW-NEXT: popq %r14
|
; X64-HSW-NEXT: popq %r14
|
||||||
; X64-HSW-NEXT: popq %r15
|
; X64-HSW-NEXT: popq %r15
|
||||||
; X64-HSW-NEXT: popq %rbp
|
; X64-HSW-NEXT: popq %rbp
|
||||||
|
|
|
@ -5,30 +5,28 @@
|
||||||
define i32 @fn() {
|
define i32 @fn() {
|
||||||
; X86-LABEL: fn:
|
; X86-LABEL: fn:
|
||||||
; X86: # BB#0: # %entry
|
; X86: # BB#0: # %entry
|
||||||
; X86-NEXT: cmpl $1, %eax
|
|
||||||
; X86-NEXT: sete %cl
|
|
||||||
; X86-NEXT: movl $-1, %eax
|
|
||||||
; X86-NEXT: jne .LBB0_2
|
|
||||||
; X86-NEXT: # BB#1: # %entry
|
|
||||||
; X86-NEXT: xorl %eax, %eax
|
; X86-NEXT: xorl %eax, %eax
|
||||||
; X86-NEXT: .LBB0_2: # %entry
|
; X86-NEXT: cmpl $1, %eax
|
||||||
|
; X86-NEXT: setne %al
|
||||||
|
; X86-NEXT: sete %cl
|
||||||
|
; X86-NEXT: negl %eax
|
||||||
; X86-NEXT: addb %cl, %cl
|
; X86-NEXT: addb %cl, %cl
|
||||||
; X86-NEXT: shll %cl, %eax
|
; X86-NEXT: shll %cl, %eax
|
||||||
; X86-NEXT: .p2align 4, 0x90
|
; X86-NEXT: .p2align 4, 0x90
|
||||||
; X86-NEXT: .LBB0_3: # %bb1
|
; X86-NEXT: .LBB0_1: # %bb1
|
||||||
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
; X86-NEXT: # =>This Inner Loop Header: Depth=1
|
||||||
; X86-NEXT: testl %eax, %eax
|
; X86-NEXT: testl %eax, %eax
|
||||||
; X86-NEXT: je .LBB0_3
|
; X86-NEXT: je .LBB0_1
|
||||||
; X86-NEXT: # BB#4: # %bb2
|
; X86-NEXT: # BB#2: # %bb2
|
||||||
; X86-NEXT: retl
|
; X86-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: fn:
|
; X64-LABEL: fn:
|
||||||
; X64: # BB#0: # %entry
|
; X64: # BB#0: # %entry
|
||||||
; X64-NEXT: xorl %edx, %edx
|
; X64-NEXT: xorl %eax, %eax
|
||||||
; X64-NEXT: cmpl $1, %eax
|
; X64-NEXT: cmpl $1, %eax
|
||||||
|
; X64-NEXT: setne %al
|
||||||
; X64-NEXT: sete %cl
|
; X64-NEXT: sete %cl
|
||||||
; X64-NEXT: movl $-1, %eax
|
; X64-NEXT: negl %eax
|
||||||
; X64-NEXT: cmovel %edx, %eax
|
|
||||||
; X64-NEXT: addb %cl, %cl
|
; X64-NEXT: addb %cl, %cl
|
||||||
; X64-NEXT: shll %cl, %eax
|
; X64-NEXT: shll %cl, %eax
|
||||||
; X64-NEXT: .p2align 4, 0x90
|
; X64-NEXT: .p2align 4, 0x90
|
||||||
|
|
|
@ -130,10 +130,8 @@ define i32 @ult_select_neg1_or_0(i32 %x, i32 %y) nounwind {
|
||||||
define i32 @ugt_select_neg1_or_0(i32 %x, i32 %y) nounwind {
|
define i32 @ugt_select_neg1_or_0(i32 %x, i32 %y) nounwind {
|
||||||
; CHECK-LABEL: ugt_select_neg1_or_0:
|
; CHECK-LABEL: ugt_select_neg1_or_0:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: xorl %ecx, %ecx
|
; CHECK-NEXT: cmpl %esi, %edi
|
||||||
; CHECK-NEXT: cmpl %edi, %esi
|
; CHECK-NEXT: sbbl %eax, %eax
|
||||||
; CHECK-NEXT: movl $-1, %eax
|
|
||||||
; CHECK-NEXT: cmovbel %ecx, %eax
|
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%cmp = icmp ugt i32 %y, %x
|
%cmp = icmp ugt i32 %y, %x
|
||||||
%ext = sext i1 %cmp to i32
|
%ext = sext i1 %cmp to i32
|
||||||
|
|
|
@ -545,12 +545,11 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
|
||||||
;
|
;
|
||||||
; MCU-LABEL: test9b:
|
; MCU-LABEL: test9b:
|
||||||
; MCU: # BB#0:
|
; MCU: # BB#0:
|
||||||
; MCU-NEXT: orl %edx, %eax
|
; MCU-NEXT: movl %edx, %ecx
|
||||||
; MCU-NEXT: movl $-1, %edx
|
|
||||||
; MCU-NEXT: je .LBB10_2
|
|
||||||
; MCU-NEXT: # BB#1:
|
|
||||||
; MCU-NEXT: xorl %edx, %edx
|
; MCU-NEXT: xorl %edx, %edx
|
||||||
; MCU-NEXT: .LBB10_2:
|
; MCU-NEXT: orl %ecx, %eax
|
||||||
|
; MCU-NEXT: sete %dl
|
||||||
|
; MCU-NEXT: negl %edx
|
||||||
; MCU-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; MCU-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; MCU-NEXT: orl %edx, %eax
|
; MCU-NEXT: orl %edx, %eax
|
||||||
; MCU-NEXT: orl {{[0-9]+}}(%esp), %edx
|
; MCU-NEXT: orl {{[0-9]+}}(%esp), %edx
|
||||||
|
@ -563,23 +562,14 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
|
||||||
|
|
||||||
;; Select between -1 and 1.
|
;; Select between -1 and 1.
|
||||||
define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
|
define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
|
||||||
; GENERIC-LABEL: test10:
|
; CHECK-LABEL: test10:
|
||||||
; GENERIC: ## BB#0:
|
; CHECK: ## BB#0:
|
||||||
; GENERIC-NEXT: cmpq $1, %rdi
|
; CHECK-NEXT: xorl %eax, %eax
|
||||||
; GENERIC-NEXT: sbbq %rax, %rax
|
; CHECK-NEXT: testq %rdi, %rdi
|
||||||
; GENERIC-NEXT: orq $1, %rax
|
; CHECK-NEXT: setne %al
|
||||||
; GENERIC-NEXT: retq
|
; CHECK-NEXT: leaq -1(%rax,%rax), %rax
|
||||||
; GENERIC-NEXT: ## -- End function
|
; CHECK-NEXT: retq
|
||||||
;
|
; CHECK-NEXT: ## -- End function
|
||||||
; ATOM-LABEL: test10:
|
|
||||||
; ATOM: ## BB#0:
|
|
||||||
; ATOM-NEXT: cmpq $1, %rdi
|
|
||||||
; ATOM-NEXT: sbbq %rax, %rax
|
|
||||||
; ATOM-NEXT: orq $1, %rax
|
|
||||||
; ATOM-NEXT: nop
|
|
||||||
; ATOM-NEXT: nop
|
|
||||||
; ATOM-NEXT: retq
|
|
||||||
; ATOM-NEXT: ## -- End function
|
|
||||||
;
|
;
|
||||||
; MCU-LABEL: test10:
|
; MCU-LABEL: test10:
|
||||||
; MCU: # BB#0:
|
; MCU: # BB#0:
|
||||||
|
@ -747,29 +737,22 @@ define i32 @test13(i32 %a, i32 %b) nounwind {
|
||||||
}
|
}
|
||||||
|
|
||||||
define i32 @test14(i32 %a, i32 %b) nounwind {
|
define i32 @test14(i32 %a, i32 %b) nounwind {
|
||||||
; GENERIC-LABEL: test14:
|
; CHECK-LABEL: test14:
|
||||||
; GENERIC: ## BB#0:
|
; CHECK: ## BB#0:
|
||||||
; GENERIC-NEXT: cmpl %esi, %edi
|
; CHECK-NEXT: xorl %eax, %eax
|
||||||
; GENERIC-NEXT: sbbl %eax, %eax
|
; CHECK-NEXT: cmpl %esi, %edi
|
||||||
; GENERIC-NEXT: notl %eax
|
; CHECK-NEXT: setae %al
|
||||||
; GENERIC-NEXT: retq
|
; CHECK-NEXT: negl %eax
|
||||||
; GENERIC-NEXT: ## -- End function
|
; CHECK-NEXT: retq
|
||||||
;
|
; CHECK-NEXT: ## -- End function
|
||||||
; ATOM-LABEL: test14:
|
|
||||||
; ATOM: ## BB#0:
|
|
||||||
; ATOM-NEXT: cmpl %esi, %edi
|
|
||||||
; ATOM-NEXT: sbbl %eax, %eax
|
|
||||||
; ATOM-NEXT: notl %eax
|
|
||||||
; ATOM-NEXT: nop
|
|
||||||
; ATOM-NEXT: nop
|
|
||||||
; ATOM-NEXT: retq
|
|
||||||
; ATOM-NEXT: ## -- End function
|
|
||||||
;
|
;
|
||||||
; MCU-LABEL: test14:
|
; MCU-LABEL: test14:
|
||||||
; MCU: # BB#0:
|
; MCU: # BB#0:
|
||||||
|
; MCU-NEXT: xorl %ecx, %ecx
|
||||||
; MCU-NEXT: cmpl %edx, %eax
|
; MCU-NEXT: cmpl %edx, %eax
|
||||||
; MCU-NEXT: sbbl %eax, %eax
|
; MCU-NEXT: setae %cl
|
||||||
; MCU-NEXT: notl %eax
|
; MCU-NEXT: negl %ecx
|
||||||
|
; MCU-NEXT: movl %ecx, %eax
|
||||||
; MCU-NEXT: retl
|
; MCU-NEXT: retl
|
||||||
%c = icmp uge i32 %a, %b
|
%c = icmp uge i32 %a, %b
|
||||||
%d = sext i1 %c to i32
|
%d = sext i1 %c to i32
|
||||||
|
@ -826,12 +809,11 @@ define i64 @test16(i64 %x) nounwind uwtable readnone ssp {
|
||||||
;
|
;
|
||||||
; MCU-LABEL: test16:
|
; MCU-LABEL: test16:
|
||||||
; MCU: # BB#0: # %entry
|
; MCU: # BB#0: # %entry
|
||||||
; MCU-NEXT: orl %edx, %eax
|
; MCU-NEXT: movl %eax, %ecx
|
||||||
; MCU-NEXT: movl $-1, %eax
|
|
||||||
; MCU-NEXT: jne .LBB18_2
|
|
||||||
; MCU-NEXT: # BB#1: # %entry
|
|
||||||
; MCU-NEXT: xorl %eax, %eax
|
; MCU-NEXT: xorl %eax, %eax
|
||||||
; MCU-NEXT: .LBB18_2: # %entry
|
; MCU-NEXT: orl %edx, %ecx
|
||||||
|
; MCU-NEXT: setne %al
|
||||||
|
; MCU-NEXT: negl %eax
|
||||||
; MCU-NEXT: movl %eax, %edx
|
; MCU-NEXT: movl %eax, %edx
|
||||||
; MCU-NEXT: retl
|
; MCU-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
|
@ -844,14 +826,16 @@ define i16 @test17(i16 %x) nounwind {
|
||||||
; GENERIC-LABEL: test17:
|
; GENERIC-LABEL: test17:
|
||||||
; GENERIC: ## BB#0: ## %entry
|
; GENERIC: ## BB#0: ## %entry
|
||||||
; GENERIC-NEXT: negw %di
|
; GENERIC-NEXT: negw %di
|
||||||
; GENERIC-NEXT: sbbw %ax, %ax
|
; GENERIC-NEXT: sbbl %eax, %eax
|
||||||
|
; GENERIC-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
|
||||||
; GENERIC-NEXT: retq
|
; GENERIC-NEXT: retq
|
||||||
; GENERIC-NEXT: ## -- End function
|
; GENERIC-NEXT: ## -- End function
|
||||||
;
|
;
|
||||||
; ATOM-LABEL: test17:
|
; ATOM-LABEL: test17:
|
||||||
; ATOM: ## BB#0: ## %entry
|
; ATOM: ## BB#0: ## %entry
|
||||||
; ATOM-NEXT: negw %di
|
; ATOM-NEXT: negw %di
|
||||||
; ATOM-NEXT: sbbw %ax, %ax
|
; ATOM-NEXT: sbbl %eax, %eax
|
||||||
|
; ATOM-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
|
||||||
; ATOM-NEXT: nop
|
; ATOM-NEXT: nop
|
||||||
; ATOM-NEXT: nop
|
; ATOM-NEXT: nop
|
||||||
; ATOM-NEXT: nop
|
; ATOM-NEXT: nop
|
||||||
|
@ -862,7 +846,8 @@ define i16 @test17(i16 %x) nounwind {
|
||||||
; MCU-LABEL: test17:
|
; MCU-LABEL: test17:
|
||||||
; MCU: # BB#0: # %entry
|
; MCU: # BB#0: # %entry
|
||||||
; MCU-NEXT: negw %ax
|
; MCU-NEXT: negw %ax
|
||||||
; MCU-NEXT: sbbw %ax, %ax
|
; MCU-NEXT: sbbl %eax, %eax
|
||||||
|
; MCU-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
||||||
; MCU-NEXT: retl
|
; MCU-NEXT: retl
|
||||||
entry:
|
entry:
|
||||||
%cmp = icmp ne i16 %x, 0
|
%cmp = icmp ne i16 %x, 0
|
||||||
|
|
|
@ -211,10 +211,9 @@ define i32 @select_C_Cplus1_signext(i1 signext %cond) {
|
||||||
define i32 @select_lea_2(i1 zeroext %cond) {
|
define i32 @select_lea_2(i1 zeroext %cond) {
|
||||||
; CHECK-LABEL: select_lea_2:
|
; CHECK-LABEL: select_lea_2:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: testb %dil, %dil
|
; CHECK-NEXT: xorb $1, %dil
|
||||||
; CHECK-NEXT: movl $-1, %ecx
|
; CHECK-NEXT: movzbl %dil, %eax
|
||||||
; CHECK-NEXT: movl $1, %eax
|
; CHECK-NEXT: leal -1(%rax,%rax), %eax
|
||||||
; CHECK-NEXT: cmovnel %ecx, %eax
|
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%sel = select i1 %cond, i32 -1, i32 1
|
%sel = select i1 %cond, i32 -1, i32 1
|
||||||
ret i32 %sel
|
ret i32 %sel
|
||||||
|
@ -223,10 +222,9 @@ define i32 @select_lea_2(i1 zeroext %cond) {
|
||||||
define i64 @select_lea_3(i1 zeroext %cond) {
|
define i64 @select_lea_3(i1 zeroext %cond) {
|
||||||
; CHECK-LABEL: select_lea_3:
|
; CHECK-LABEL: select_lea_3:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: testb %dil, %dil
|
; CHECK-NEXT: xorb $1, %dil
|
||||||
; CHECK-NEXT: movl $1, %ecx
|
; CHECK-NEXT: movzbl %dil, %eax
|
||||||
; CHECK-NEXT: movq $-2, %rax
|
; CHECK-NEXT: leaq -2(%rax,%rax,2), %rax
|
||||||
; CHECK-NEXT: cmoveq %rcx, %rax
|
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%sel = select i1 %cond, i64 -2, i64 1
|
%sel = select i1 %cond, i64 -2, i64 1
|
||||||
ret i64 %sel
|
ret i64 %sel
|
||||||
|
@ -235,10 +233,9 @@ define i64 @select_lea_3(i1 zeroext %cond) {
|
||||||
define i32 @select_lea_5(i1 zeroext %cond) {
|
define i32 @select_lea_5(i1 zeroext %cond) {
|
||||||
; CHECK-LABEL: select_lea_5:
|
; CHECK-LABEL: select_lea_5:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: testb %dil, %dil
|
; CHECK-NEXT: xorb $1, %dil
|
||||||
; CHECK-NEXT: movl $-2, %ecx
|
; CHECK-NEXT: movzbl %dil, %eax
|
||||||
; CHECK-NEXT: movl $3, %eax
|
; CHECK-NEXT: leal -2(%rax,%rax,4), %eax
|
||||||
; CHECK-NEXT: cmovnel %ecx, %eax
|
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%sel = select i1 %cond, i32 -2, i32 3
|
%sel = select i1 %cond, i32 -2, i32 3
|
||||||
ret i32 %sel
|
ret i32 %sel
|
||||||
|
@ -247,10 +244,9 @@ define i32 @select_lea_5(i1 zeroext %cond) {
|
||||||
define i64 @select_lea_9(i1 zeroext %cond) {
|
define i64 @select_lea_9(i1 zeroext %cond) {
|
||||||
; CHECK-LABEL: select_lea_9:
|
; CHECK-LABEL: select_lea_9:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: testb %dil, %dil
|
; CHECK-NEXT: xorb $1, %dil
|
||||||
; CHECK-NEXT: movl $2, %ecx
|
; CHECK-NEXT: movzbl %dil, %eax
|
||||||
; CHECK-NEXT: movq $-7, %rax
|
; CHECK-NEXT: leaq -7(%rax,%rax,8), %rax
|
||||||
; CHECK-NEXT: cmoveq %rcx, %rax
|
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%sel = select i1 %cond, i64 -7, i64 2
|
%sel = select i1 %cond, i64 -7, i64 2
|
||||||
ret i64 %sel
|
ret i64 %sel
|
||||||
|
@ -262,8 +258,8 @@ define i64 @sel_1_2(i64 %x, i64 %y) {
|
||||||
; CHECK-LABEL: sel_1_2:
|
; CHECK-LABEL: sel_1_2:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: cmpq $42, %rdi
|
; CHECK-NEXT: cmpq $42, %rdi
|
||||||
; CHECK-NEXT: sbbq %rax, %rax
|
; CHECK-NEXT: sbbq $0, %rsi
|
||||||
; CHECK-NEXT: leaq 2(%rax,%rsi), %rax
|
; CHECK-NEXT: leaq 2(%rsi), %rax
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%cmp = icmp ult i64 %x, 42
|
%cmp = icmp ult i64 %x, 42
|
||||||
%sel = select i1 %cmp, i64 1, i64 2
|
%sel = select i1 %cmp, i64 1, i64 2
|
||||||
|
@ -271,30 +267,31 @@ define i64 @sel_1_2(i64 %x, i64 %y) {
|
||||||
ret i64 %sub
|
ret i64 %sub
|
||||||
}
|
}
|
||||||
|
|
||||||
; No LEA with 8-bit or 16-bit, but this shouldn't need branches or cmov.
|
; No LEA with 8-bit, but this shouldn't need branches or cmov.
|
||||||
|
|
||||||
define i8 @sel_1_neg1(i32 %x) {
|
define i8 @sel_1_neg1(i32 %x) {
|
||||||
; CHECK-LABEL: sel_1_neg1:
|
; CHECK-LABEL: sel_1_neg1:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: cmpl $42, %edi
|
; CHECK-NEXT: cmpl $42, %edi
|
||||||
; CHECK-NEXT: movb $3, %al
|
; CHECK-NEXT: setg %al
|
||||||
; CHECK-NEXT: jg .LBB23_2
|
; CHECK-NEXT: shlb $2, %al
|
||||||
; CHECK-NEXT: # BB#1:
|
; CHECK-NEXT: decb %al
|
||||||
; CHECK-NEXT: movb $-1, %al
|
|
||||||
; CHECK-NEXT: .LBB23_2:
|
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%cmp = icmp sgt i32 %x, 42
|
%cmp = icmp sgt i32 %x, 42
|
||||||
%sel = select i1 %cmp, i8 3, i8 -1
|
%sel = select i1 %cmp, i8 3, i8 -1
|
||||||
ret i8 %sel
|
ret i8 %sel
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; We get an LEA for 16-bit because we ignore the high-bits.
|
||||||
|
|
||||||
define i16 @sel_neg1_1(i32 %x) {
|
define i16 @sel_neg1_1(i32 %x) {
|
||||||
; CHECK-LABEL: sel_neg1_1:
|
; CHECK-LABEL: sel_neg1_1:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: cmpl $42, %edi
|
; CHECK-NEXT: xorl %eax, %eax
|
||||||
; CHECK-NEXT: movw $-1, %cx
|
; CHECK-NEXT: cmpl $43, %edi
|
||||||
; CHECK-NEXT: movw $3, %ax
|
; CHECK-NEXT: setl %al
|
||||||
; CHECK-NEXT: cmovgw %cx, %ax
|
; CHECK-NEXT: leal -1(,%rax,4), %eax
|
||||||
|
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%cmp = icmp sgt i32 %x, 42
|
%cmp = icmp sgt i32 %x, 42
|
||||||
%sel = select i1 %cmp, i16 -1, i16 3
|
%sel = select i1 %cmp, i16 -1, i16 3
|
||||||
|
@ -306,10 +303,10 @@ define i16 @sel_neg1_1(i32 %x) {
|
||||||
define i32 @sel_1_neg1_32(i32 %x) {
|
define i32 @sel_1_neg1_32(i32 %x) {
|
||||||
; CHECK-LABEL: sel_1_neg1_32:
|
; CHECK-LABEL: sel_1_neg1_32:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
|
; CHECK-NEXT: xorl %eax, %eax
|
||||||
; CHECK-NEXT: cmpl $42, %edi
|
; CHECK-NEXT: cmpl $42, %edi
|
||||||
; CHECK-NEXT: movl $8, %ecx
|
; CHECK-NEXT: setg %al
|
||||||
; CHECK-NEXT: movl $-1, %eax
|
; CHECK-NEXT: leal -1(%rax,%rax,8), %eax
|
||||||
; CHECK-NEXT: cmovgl %ecx, %eax
|
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%cmp = icmp sgt i32 %x, 42
|
%cmp = icmp sgt i32 %x, 42
|
||||||
%sel = select i1 %cmp, i32 8, i32 -1
|
%sel = select i1 %cmp, i32 8, i32 -1
|
||||||
|
@ -319,10 +316,10 @@ define i32 @sel_1_neg1_32(i32 %x) {
|
||||||
define i32 @sel_neg1_1_32(i32 %x) {
|
define i32 @sel_neg1_1_32(i32 %x) {
|
||||||
; CHECK-LABEL: sel_neg1_1_32:
|
; CHECK-LABEL: sel_neg1_1_32:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: cmpl $42, %edi
|
; CHECK-NEXT: xorl %eax, %eax
|
||||||
; CHECK-NEXT: movl $-7, %ecx
|
; CHECK-NEXT: cmpl $43, %edi
|
||||||
; CHECK-NEXT: movl $2, %eax
|
; CHECK-NEXT: setl %al
|
||||||
; CHECK-NEXT: cmovgl %ecx, %eax
|
; CHECK-NEXT: leal -7(%rax,%rax,8), %eax
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%cmp = icmp sgt i32 %x, 42
|
%cmp = icmp sgt i32 %x, 42
|
||||||
%sel = select i1 %cmp, i32 -7, i32 2
|
%sel = select i1 %cmp, i32 -7, i32 2
|
||||||
|
@ -336,12 +333,9 @@ define i32 @sel_neg1_1_32(i32 %x) {
|
||||||
define i8 @select_pow2_diff(i1 zeroext %cond) {
|
define i8 @select_pow2_diff(i1 zeroext %cond) {
|
||||||
; CHECK-LABEL: select_pow2_diff:
|
; CHECK-LABEL: select_pow2_diff:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: testb %dil, %dil
|
; CHECK-NEXT: shlb $4, %dil
|
||||||
; CHECK-NEXT: movb $19, %al
|
; CHECK-NEXT: orb $3, %dil
|
||||||
; CHECK-NEXT: jne .LBB27_2
|
; CHECK-NEXT: movl %edi, %eax
|
||||||
; CHECK-NEXT: # BB#1:
|
|
||||||
; CHECK-NEXT: movb $3, %al
|
|
||||||
; CHECK-NEXT: .LBB27_2:
|
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%sel = select i1 %cond, i8 19, i8 3
|
%sel = select i1 %cond, i8 19, i8 3
|
||||||
ret i8 %sel
|
ret i8 %sel
|
||||||
|
@ -350,10 +344,11 @@ define i8 @select_pow2_diff(i1 zeroext %cond) {
|
||||||
define i16 @select_pow2_diff_invert(i1 zeroext %cond) {
|
define i16 @select_pow2_diff_invert(i1 zeroext %cond) {
|
||||||
; CHECK-LABEL: select_pow2_diff_invert:
|
; CHECK-LABEL: select_pow2_diff_invert:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: testb %dil, %dil
|
; CHECK-NEXT: xorb $1, %dil
|
||||||
; CHECK-NEXT: movw $7, %cx
|
; CHECK-NEXT: movzbl %dil, %eax
|
||||||
; CHECK-NEXT: movw $71, %ax
|
; CHECK-NEXT: shll $6, %eax
|
||||||
; CHECK-NEXT: cmovnew %cx, %ax
|
; CHECK-NEXT: orl $7, %eax
|
||||||
|
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%sel = select i1 %cond, i16 7, i16 71
|
%sel = select i1 %cond, i16 7, i16 71
|
||||||
ret i16 %sel
|
ret i16 %sel
|
||||||
|
@ -362,10 +357,9 @@ define i16 @select_pow2_diff_invert(i1 zeroext %cond) {
|
||||||
define i32 @select_pow2_diff_neg(i1 zeroext %cond) {
|
define i32 @select_pow2_diff_neg(i1 zeroext %cond) {
|
||||||
; CHECK-LABEL: select_pow2_diff_neg:
|
; CHECK-LABEL: select_pow2_diff_neg:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: testb %dil, %dil
|
; CHECK-NEXT: shlb $4, %dil
|
||||||
; CHECK-NEXT: movl $-9, %ecx
|
; CHECK-NEXT: movzbl %dil, %eax
|
||||||
; CHECK-NEXT: movl $-25, %eax
|
; CHECK-NEXT: orl $-25, %eax
|
||||||
; CHECK-NEXT: cmovnel %ecx, %eax
|
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%sel = select i1 %cond, i32 -9, i32 -25
|
%sel = select i1 %cond, i32 -9, i32 -25
|
||||||
ret i32 %sel
|
ret i32 %sel
|
||||||
|
@ -374,10 +368,10 @@ define i32 @select_pow2_diff_neg(i1 zeroext %cond) {
|
||||||
define i64 @select_pow2_diff_neg_invert(i1 zeroext %cond) {
|
define i64 @select_pow2_diff_neg_invert(i1 zeroext %cond) {
|
||||||
; CHECK-LABEL: select_pow2_diff_neg_invert:
|
; CHECK-LABEL: select_pow2_diff_neg_invert:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: testb %dil, %dil
|
; CHECK-NEXT: xorb $1, %dil
|
||||||
; CHECK-NEXT: movl $29, %ecx
|
; CHECK-NEXT: movzbl %dil, %eax
|
||||||
; CHECK-NEXT: movq $-99, %rax
|
; CHECK-NEXT: shlq $7, %rax
|
||||||
; CHECK-NEXT: cmoveq %rcx, %rax
|
; CHECK-NEXT: addq $-99, %rax
|
||||||
; CHECK-NEXT: retq
|
; CHECK-NEXT: retq
|
||||||
%sel = select i1 %cond, i64 -99, i64 29
|
%sel = select i1 %cond, i64 -99, i64 29
|
||||||
ret i64 %sel
|
ret i64 %sel
|
||||||
|
|
|
@ -51,8 +51,10 @@ define i32 @t3() nounwind readonly {
|
||||||
;
|
;
|
||||||
; X64-LABEL: t3:
|
; X64-LABEL: t3:
|
||||||
; X64: # BB#0: # %entry
|
; X64: # BB#0: # %entry
|
||||||
; X64-NEXT: cmpl $1, %eax
|
; X64-NEXT: xorl %eax, %eax
|
||||||
; X64-NEXT: sbbq %rax, %rax
|
; X64-NEXT: testl %eax, %eax
|
||||||
|
; X64-NEXT: sete %al
|
||||||
|
; X64-NEXT: negq %rax
|
||||||
; X64-NEXT: cmpq %rax, %rax
|
; X64-NEXT: cmpq %rax, %rax
|
||||||
; X64-NEXT: xorl %eax, %eax
|
; X64-NEXT: xorl %eax, %eax
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
@ -75,13 +77,11 @@ if.end:
|
||||||
define i32 @t4(i64 %x) nounwind readnone ssp {
|
define i32 @t4(i64 %x) nounwind readnone ssp {
|
||||||
; X32-LABEL: t4:
|
; X32-LABEL: t4:
|
||||||
; X32: # BB#0:
|
; X32: # BB#0:
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X32-NEXT: orl {{[0-9]+}}(%esp), %eax
|
|
||||||
; X32-NEXT: movl $-1, %eax
|
|
||||||
; X32-NEXT: je .LBB3_2
|
|
||||||
; X32-NEXT: # BB#1:
|
|
||||||
; X32-NEXT: xorl %eax, %eax
|
; X32-NEXT: xorl %eax, %eax
|
||||||
; X32-NEXT: .LBB3_2:
|
; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx
|
||||||
|
; X32-NEXT: sete %al
|
||||||
|
; X32-NEXT: negl %eax
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: t4:
|
; X64-LABEL: t4:
|
||||||
|
|
Loading…
Reference in New Issue