From 207f32948b2408bebd5a523695f6f7c08049db74 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 18 Jan 2021 10:29:08 +0000 Subject: [PATCH] [DAG] SimplifyDemandedBits - use KnownBits comparisons to remove ISD::UMIN/UMAX ops Use the KnownBits icmp comparisons to determine when a ISD::UMIN/UMAX op is unnecessary should either op be known to be ULT/ULE or UGT/UGE than the other. Differential Revision: https://reviews.llvm.org/D94532 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 + .../CodeGen/SelectionDAG/TargetLowering.cpp | 26 + .../CodeGen/AMDGPU/r600-legalize-umax-bug.ll | 2 +- llvm/test/CodeGen/X86/combine-umin.ll | 11 +- llvm/test/CodeGen/X86/sdiv_fix_sat.ll | 542 ++++++++---------- llvm/test/CodeGen/X86/udiv_fix_sat.ll | 156 +++-- 6 files changed, 347 insertions(+), 394 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e265bcea5945..ef83df8bdd96 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4607,6 +4607,10 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1); } + // Simplify the operands using demanded-bits information. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 21953373b745..b19033e3e427 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1722,6 +1722,32 @@ bool TargetLowering::SimplifyDemandedBits( } break; } + case ISD::UMIN: { + // Check if one arg is always less than (or equal) to the other arg. + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1); + KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); + Known = KnownBits::umin(Known0, Known1); + if (Optional IsULE = KnownBits::ule(Known0, Known1)) + return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1); + if (Optional IsULT = KnownBits::ult(Known0, Known1)) + return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1); + break; + } + case ISD::UMAX: { + // Check if one arg is always greater than (or equal) to the other arg. + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1); + KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); + Known = KnownBits::umax(Known0, Known1); + if (Optional IsUGE = KnownBits::uge(Known0, Known1)) + return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1); + if (Optional IsUGT = KnownBits::ugt(Known0, Known1)) + return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1); + break; + } case ISD::BITREVERSE: { SDValue Src = Op.getOperand(0); APInt DemandedSrcBits = DemandedBits.reverseBits(); diff --git a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll index b4cd36daad65..f0604c7fe782 100644 --- a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll @@ -18,7 +18,7 @@ define amdgpu_kernel void @test(i64 addrspace(1)* %out) { ; CHECK-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; CHECK-NEXT: MOV * T0.W, KC0[2].Y, ; CHECK-NEXT: ALU clause starting at 11: -; CHECK-NEXT: MAX_UINT T0.X, T0.X, literal.x, +; CHECK-NEXT: MOV T0.X, literal.x, ; CHECK-NEXT: MOV T0.Y, 0.0, ; CHECK-NEXT: LSHR * T1.X, T0.W, literal.y, ; CHECK-NEXT: 4(5.605194e-45), 2(2.802597e-45) diff --git a/llvm/test/CodeGen/X86/combine-umin.ll b/llvm/test/CodeGen/X86/combine-umin.ll index b22c45bbce45..1be72ad66799 100644 --- a/llvm/test/CodeGen/X86/combine-umin.ll +++ b/llvm/test/CodeGen/X86/combine-umin.ll @@ -10,14 +10,9 @@ define i8 @test_demandedbits_umin_ult(i8 %a0, i8 %a1) { ; CHECK-LABEL: test_demandedbits_umin_ult: ; CHECK: # %bb.0: -; CHECK-NEXT: orb $12, %dil -; CHECK-NEXT: orb $4, %sil -; CHECK-NEXT: andb $13, %dil -; CHECK-NEXT: andb $12, %sil -; CHECK-NEXT: movzbl %dil, %ecx -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: cmpb %al, %cl -; CHECK-NEXT: cmovbl %ecx, %eax +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: orb $4, %al +; CHECK-NEXT: andb $12, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %lhs0 = and i8 %a0, 13 ; b1101 diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll index 617d5d7876bd..9801cb4018b9 100644 --- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll @@ -313,56 +313,50 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X64-NEXT: movq %rsi, (%rsp) # 8-byte Spill ; X64-NEXT: movq %rdi, %r15 ; X64-NEXT: leaq (%rdi,%rdi), %rax -; X64-NEXT: movq %rdi, %r12 -; X64-NEXT: sarq $63, %r12 -; X64-NEXT: shldq $31, %rax, %r12 +; X64-NEXT: movq %rdi, %rbx +; X64-NEXT: sarq $63, %rbx +; X64-NEXT: shldq $31, %rax, %rbx ; X64-NEXT: shlq $32, %r15 -; X64-NEXT: movq %rsi, %r13 -; X64-NEXT: sarq $63, %r13 +; X64-NEXT: movq %rsi, %r12 +; X64-NEXT: sarq $63, %r12 ; X64-NEXT: movq %r15, %rdi -; X64-NEXT: movq %r12, %rsi -; X64-NEXT: movq %r13, %rcx +; X64-NEXT: movq %rbx, %rsi +; X64-NEXT: movq %r12, %rcx ; X64-NEXT: callq __divti3@PLT -; X64-NEXT: movq %rax, %rbx +; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: subq $1, %rbx +; X64-NEXT: subq $1, %r13 ; X64-NEXT: sbbq $0, %rbp -; X64-NEXT: testq %r12, %r12 +; X64-NEXT: testq %rbx, %rbx ; X64-NEXT: sets %al -; X64-NEXT: testq %r13, %r13 +; X64-NEXT: testq %r12, %r12 ; X64-NEXT: sets %r14b ; X64-NEXT: xorb %al, %r14b ; X64-NEXT: movq %r15, %rdi -; X64-NEXT: movq %r12, %rsi +; X64-NEXT: movq %rbx, %rsi ; X64-NEXT: movq (%rsp), %rdx # 8-byte Reload -; X64-NEXT: movq %r13, %rcx +; X64-NEXT: movq %r12, %rcx ; X64-NEXT: callq __modti3@PLT ; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al ; X64-NEXT: testb %r14b, %al ; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload -; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload -; X64-NEXT: cmpq $-1, %rbx -; X64-NEXT: movq $-1, %rax +; X64-NEXT: cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testq %rbp, %rbp ; X64-NEXT: movq $-1, %rcx -; X64-NEXT: cmovbq %rbx, %rcx -; X64-NEXT: xorl %edx, %edx +; X64-NEXT: movq $-1, %rdx +; X64-NEXT: cmovsq %r13, %rdx +; X64-NEXT: cmoveq %r13, %rdx +; X64-NEXT: cmovnsq %rax, %rbp ; X64-NEXT: testq %rbp, %rbp -; X64-NEXT: cmovnsq %rax, %rbx -; X64-NEXT: cmoveq %rcx, %rbx -; X64-NEXT: cmovnsq %rdx, %rbp -; X64-NEXT: testq %rbx, %rbx -; X64-NEXT: movl $0, %ecx -; X64-NEXT: cmovaq %rbx, %rcx -; X64-NEXT: testq %rbp, %rbp -; X64-NEXT: cmovnsq %rbp, %rax -; X64-NEXT: cmovsq %rdx, %rbx +; X64-NEXT: cmovnsq %rbp, %rcx +; X64-NEXT: cmovnsq %rdx, %rax ; X64-NEXT: cmpq $-1, %rbp -; X64-NEXT: cmoveq %rcx, %rbx -; X64-NEXT: shrdq $1, %rax, %rbx -; X64-NEXT: movq %rbx, %rax +; X64-NEXT: cmoveq %rdx, %rax +; X64-NEXT: shrdq $1, %rcx, %rax ; X64-NEXT: addq $24, %rsp ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r12 @@ -383,12 +377,12 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: subl $88, %esp ; X86-NEXT: movl 8(%ebp), %ecx ; X86-NEXT: movl 12(%ebp), %eax -; X86-NEXT: movl 20(%ebp), %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl 20(%ebp), %edi ; X86-NEXT: sarl $31, %edi -; X86-NEXT: movl %edi, %edx -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: sarl $31, %ebx +; X86-NEXT: movl %ebx, %edx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: shldl $31, %eax, %edx ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: shldl $31, %ecx, %eax @@ -397,42 +391,42 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: shll $31, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi ; X86-NEXT: pushl 20(%ebp) ; X86-NEXT: pushl 16(%ebp) -; X86-NEXT: pushl %edi +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edx ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %eax ; X86-NEXT: calll __divti3 ; X86-NEXT: addl $32, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: subl $1, %esi -; X86-NEXT: sbbl $0, %edi +; X86-NEXT: subl $1, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %esi +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: testl %edx, %edx +; X86-NEXT: testl %edi, %edi ; X86-NEXT: sets %al ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: sets %ah -; X86-NEXT: xorb %al, %ah -; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: sets %dl +; X86-NEXT: xorb %al, %dl +; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %edx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %edi ; X86-NEXT: pushl 20(%ebp) ; X86-NEXT: pushl 16(%ebp) ; X86-NEXT: pushl %ecx @@ -449,59 +443,54 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: setne %al ; X86-NEXT: testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload ; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovsl %ebx, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: cmovsl %esi, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF -; X86-NEXT: cmovsl %edi, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ebx, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: andl %eax, %edx +; X86-NEXT: movl $0, %edx +; X86-NEXT: cmovsl %ebx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: cmovsl %esi, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $-1, %edx +; X86-NEXT: cmovsl %eax, %edx +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: andl %ecx, %edi ; X86-NEXT: testl %ebx, %ebx -; X86-NEXT: cmovel %ebx, %edx -; X86-NEXT: cmpl $-1, %esi -; X86-NEXT: movl $-1, %eax -; X86-NEXT: cmovbl %esi, %eax -; X86-NEXT: cmpl $2147483647, %edi # imm = 0x7FFFFFFF -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: cmovael %ecx, %esi -; X86-NEXT: cmovel %eax, %esi +; X86-NEXT: cmovel %ebx, %edi +; X86-NEXT: cmpl $2147483647, %esi # imm = 0x7FFFFFFF ; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: cmovael %eax, %edi +; X86-NEXT: cmovael %eax, %esi +; X86-NEXT: movl $-1, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: cmovbl %ecx, %eax +; X86-NEXT: cmovel %ecx, %eax ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: cmovnel %edx, %eax ; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: testl %esi, %esi -; X86-NEXT: movl $0, %eax -; X86-NEXT: cmoval %esi, %eax -; X86-NEXT: cmpl $-2147483648, %edi # imm = 0x80000000 +; X86-NEXT: cmpl $-2147483648, %esi # imm = 0x80000000 ; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmoval %esi, %ecx +; X86-NEXT: cmoval %eax, %ecx ; X86-NEXT: cmovel %eax, %ecx -; X86-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X86-NEXT: cmoval %edi, %eax +; X86-NEXT: movl $-2147483648, %edx # imm = 0x80000000 +; X86-NEXT: cmoval %esi, %edx ; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: movl $-2147483648, %ebx # imm = 0x80000000 -; X86-NEXT: cmovsl %ebx, %edi ; X86-NEXT: movl $0, %ebx +; X86-NEXT: cmovsl %ebx, %eax +; X86-NEXT: movl $-2147483648, %ebx # imm = 0x80000000 ; X86-NEXT: cmovsl %ebx, %esi -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: cmpl $-1, %edx -; X86-NEXT: cmovel %ecx, %esi -; X86-NEXT: cmovel %eax, %edi -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl %edi, %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: cmpl $-1, %edi +; X86-NEXT: cmovel %edx, %esi +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: movl %esi, %edx ; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -1054,8 +1043,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %esi, %eax ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: testl %ebx, %ebx @@ -1063,11 +1052,11 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: sets %bh ; X86-NEXT: xorb %bl, %bh -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: orl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: orl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl %esi, %eax +; X86-NEXT: orl %edi, %eax ; X86-NEXT: setne %al ; X86-NEXT: testb %bh, %al ; X86-NEXT: cmovel %edx, %ecx @@ -1079,20 +1068,19 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel %edi, %eax +; X86-NEXT: cmovel %esi, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: subl $1, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %eax -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edi, %esi +; X86-NEXT: sbbl $0, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax @@ -1123,20 +1111,20 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: setne %al ; X86-NEXT: testb %bl, %al -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel %edi, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: cmovel %edi, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: subl $1, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: subl $1, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %esi, %eax ; X86-NEXT: sbbl $0, %eax @@ -1145,19 +1133,19 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: sbbl $0, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl $0, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl $0, %edx ; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: sets %bl ; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload ; X86-NEXT: sets %bh ; X86-NEXT: xorb %bl, %bh -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: orl {{[0-9]+}}(%esp), %edi -; X86-NEXT: orl %eax, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl %edi, %eax ; X86-NEXT: setne %al ; X86-NEXT: testb %bh, %al ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -1166,219 +1154,179 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: testl %edx, %edx ; X86-NEXT: movl $0, %eax -; X86-NEXT: cmovsl %ecx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $-1, %eax ; X86-NEXT: cmovsl %edx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $-1, %eax +; X86-NEXT: cmovsl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, %edx -; X86-NEXT: sarl $31, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: sarl $31, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %ecx, %esi ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload ; X86-NEXT: testl %eax, %eax ; X86-NEXT: cmovel %eax, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $0, %edx -; X86-NEXT: cmovsl %eax, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $-1, %eax -; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: movl %ebx, %edx -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: testl %eax, %eax -; X86-NEXT: cmovel %eax, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $0, %edx -; X86-NEXT: cmovsl %eax, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovsl %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl $-1, %eax ; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %eax, %edi ; X86-NEXT: sarl $31, %edi -; X86-NEXT: movl %edi, %edx -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; X86-NEXT: testl %eax, %eax -; X86-NEXT: cmovel %eax, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $0, %esi -; X86-NEXT: cmovsl %eax, %esi -; X86-NEXT: movl $-1, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: cmovsl %edx, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmovel %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, %eax +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovsl %eax, %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $-1, %eax +; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: sarl $31, %ebx +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: testl %esi, %esi +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovsl %esi, %ecx +; X86-NEXT: movl $-1, %eax +; X86-NEXT: cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, %eax ; X86-NEXT: sarl $31, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: testl %edx, %edx +; X86-NEXT: cmovel %edx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: cmpl $-1, %edx -; X86-NEXT: movl $-1, %eax -; X86-NEXT: cmovael %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: cmpl $1, %edx -; X86-NEXT: movl $0, %eax -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: notl %eax -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: testl %edx, %edx -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovbl %edx, %ecx -; X86-NEXT: andl %edx, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: cmovel %ecx, %edi -; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: testl %eax, %eax -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmoval %eax, %ecx -; X86-NEXT: cmpl $-1, %edi -; X86-NEXT: movl $0, %edx -; X86-NEXT: cmovnel %edx, %ecx -; X86-NEXT: testl %esi, %esi -; X86-NEXT: movl $-1, %edx -; X86-NEXT: cmovsl %edx, %edi -; X86-NEXT: movl $0, %edx -; X86-NEXT: cmovsl %edx, %eax -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: cmpl $-1, %esi -; X86-NEXT: cmovel %ecx, %eax -; X86-NEXT: cmovnel %edi, %esi -; X86-NEXT: shldl $31, %eax, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmpl $-1, %eax -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: cmovael %ecx, %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: cmpl $1, %esi -; X86-NEXT: movl $0, %eax -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: notl %eax -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: testl %esi, %esi -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovbl %esi, %ecx -; X86-NEXT: andl %esi, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: andl %eax, %ebx +; X86-NEXT: cmpl $1, %eax +; X86-NEXT: movl $0, %edx +; X86-NEXT: sbbl %edx, %edx +; X86-NEXT: notl %edx ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: cmovel %ecx, %ebx -; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: testl %eax, %eax -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmoval %eax, %ecx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: movl $0, %esi +; X86-NEXT: cmovel %esi, %ebx ; X86-NEXT: cmpl $-1, %ebx -; X86-NEXT: movl $0, %edi -; X86-NEXT: cmovnel %edi, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: testl %esi, %esi -; X86-NEXT: movl $-1, %edx -; X86-NEXT: cmovsl %edx, %ebx -; X86-NEXT: cmovsl %edi, %eax -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: cmpl $-1, %esi -; X86-NEXT: cmovel %ecx, %eax -; X86-NEXT: cmovnel %ebx, %esi -; X86-NEXT: shldl $31, %eax, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmpl $-1, %eax -; X86-NEXT: cmovael %edx, %eax -; X86-NEXT: movl $-1, %ebx -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: cmpl $1, %edx ; X86-NEXT: movl $0, %eax -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: notl %eax -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: testl %edx, %edx -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovbl %edx, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-NEXT: andl %edx, %edi -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: cmovel %ecx, %edi -; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: testl %eax, %eax -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmoval %eax, %ecx -; X86-NEXT: cmpl $-1, %edi -; X86-NEXT: movl $0, %edx -; X86-NEXT: cmovnel %edx, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: testl %esi, %esi -; X86-NEXT: cmovsl %ebx, %edi -; X86-NEXT: cmovsl %edx, %eax -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: cmpl $-1, %esi -; X86-NEXT: cmovel %ecx, %eax -; X86-NEXT: cmovnel %edi, %esi -; X86-NEXT: shldl $31, %eax, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: cmpl $-1, %eax -; X86-NEXT: cmovael %ebx, %eax +; X86-NEXT: cmovel %edx, %eax +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: cmovsl %esi, %edx ; X86-NEXT: movl $-1, %esi -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: cmpl $1, %edx -; X86-NEXT: movl $0, %eax -; X86-NEXT: sbbl %eax, %eax -; X86-NEXT: notl %eax -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: testl %edx, %edx -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovbl %edx, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: andl %edx, %ebx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-NEXT: cmovel %ecx, %ebx -; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-NEXT: testl %eax, %eax -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmoval %eax, %ecx -; X86-NEXT: cmpl $-1, %ebx -; X86-NEXT: movl $0, %edi -; X86-NEXT: cmovnel %edi, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: testl %edx, %edx ; X86-NEXT: cmovsl %esi, %ebx -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: cmovsl %edi, %eax -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: andl %edx, %ebx -; X86-NEXT: cmpl $-1, %ebx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmpl $-1, %ecx +; X86-NEXT: cmovnel %ebx, %ecx +; X86-NEXT: cmovel %eax, %edx +; X86-NEXT: shrdl $1, %ecx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl %eax, %edi +; X86-NEXT: cmpl $1, %eax +; X86-NEXT: movl $0, %ecx +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: notl %ecx +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: movl $0, %esi +; X86-NEXT: cmovel %esi, %edi +; X86-NEXT: cmpl $-1, %edi +; X86-NEXT: movl $0, %eax ; X86-NEXT: cmovel %ecx, %eax -; X86-NEXT: cmovnel %esi, %ebx -; X86-NEXT: shldl $31, %eax, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: cmovsl %esi, %ecx +; X86-NEXT: movl $-1, %esi +; X86-NEXT: cmovsl %esi, %edi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: cmpl $-1, %ebx +; X86-NEXT: cmovnel %edi, %ebx +; X86-NEXT: cmovel %eax, %ecx +; X86-NEXT: shrdl $1, %ebx, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: andl %eax, %edi +; X86-NEXT: cmpl $1, %eax +; X86-NEXT: movl $0, %esi +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: notl %esi +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl %edi, %eax +; X86-NEXT: movl $0, %edi +; X86-NEXT: cmovel %edi, %eax +; X86-NEXT: cmpl $-1, %eax +; X86-NEXT: movl %eax, %edi +; X86-NEXT: movl $0, %eax +; X86-NEXT: cmovel %esi, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: testl %ebx, %ebx +; X86-NEXT: movl $0, %eax +; X86-NEXT: cmovsl %eax, %esi +; X86-NEXT: movl $-1, %eax +; X86-NEXT: cmovsl %eax, %edi +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-NEXT: cmpl $-1, %ebx +; X86-NEXT: cmovnel %edi, %ebx +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: shrdl $1, %ebx, %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: andl %eax, %ebx +; X86-NEXT: cmpl $1, %eax +; X86-NEXT: movl $0, %edi +; X86-NEXT: sbbl %edi, %edi +; X86-NEXT: notl %edi +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: movl $0, %ebx +; X86-NEXT: cmovel %ebx, %eax +; X86-NEXT: cmpl $-1, %eax +; X86-NEXT: movl $0, %ebx +; X86-NEXT: cmovel %edi, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: movl $0, %ebx +; X86-NEXT: cmovsl %ebx, %edi +; X86-NEXT: movl $-1, %ebx +; X86-NEXT: cmovsl %ebx, %eax +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: cmpl $-1, %eax +; X86-NEXT: cmovnel %ebx, %eax +; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-NEXT: shrdl $1, %eax, %edi ; X86-NEXT: movl 8(%ebp), %eax -; X86-NEXT: movl %ebx, 12(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, 8(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl %esi, 8(%eax) ; X86-NEXT: movl %ecx, 4(%eax) -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: movl %edx, (%eax) ; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll index 2be51c3ccbba..0f295a0f7c2a 100644 --- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll @@ -180,18 +180,15 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X64-NEXT: xorl %ebx, %ebx ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: callq __udivti3@PLT -; X64-NEXT: cmpq $-1, %rax -; X64-NEXT: movq $-1, %rcx -; X64-NEXT: cmovbq %rax, %rcx ; X64-NEXT: cmpq $1, %rdx -; X64-NEXT: movl $1, %esi -; X64-NEXT: cmovbq %rdx, %rsi ; X64-NEXT: sbbq %rbx, %rbx ; X64-NEXT: notq %rbx ; X64-NEXT: orq %rax, %rbx ; X64-NEXT: cmpq $1, %rdx -; X64-NEXT: cmoveq %rcx, %rbx -; X64-NEXT: shrdq $1, %rsi, %rbx +; X64-NEXT: movl $1, %ecx +; X64-NEXT: cmovbq %rdx, %rcx +; X64-NEXT: cmoveq %rax, %rbx +; X64-NEXT: shrdq $1, %rcx, %rbx ; X64-NEXT: movq %rbx, %rax ; X64-NEXT: popq %rbx ; X64-NEXT: retq @@ -221,18 +218,15 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: pushl %esi ; X86-NEXT: calll __udivti3 ; X86-NEXT: addl $32, %esp -; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl (%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: cmpl $-1, %eax -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: movl $-1, %esi -; X86-NEXT: cmovbl %eax, %esi ; X86-NEXT: cmpl $-1, %edx +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: cmovel %edx, %eax -; X86-NEXT: cmovel %esi, %eax -; X86-NEXT: cmovael %ecx, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: orl {{[0-9]+}}(%esp), %esi +; X86-NEXT: cmovel %ecx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl $-1, %ecx ; X86-NEXT: cmovnel %ecx, %edx ; X86-NEXT: cmovnel %ecx, %eax ; X86-NEXT: leal -4(%ebp), %esp @@ -400,8 +394,8 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: addl %ecx, %ecx @@ -414,109 +408,95 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: pushl %ecx ; X86-NEXT: calll __udivdi3 ; X86-NEXT: addl $16, %esp -; X86-NEXT: cmpl $-1, %eax -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: cmovbl %eax, %ecx ; X86-NEXT: cmpl $1, %edx -; X86-NEXT: movl $0, %edi -; X86-NEXT: sbbl %edi, %edi -; X86-NEXT: notl %edi -; X86-NEXT: orl %eax, %edi -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: addl %esi, %esi -; X86-NEXT: setb %al -; X86-NEXT: cmpl $1, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmovel %ecx, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $1, %ecx -; X86-NEXT: cmovael %ecx, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shldl $31, %esi, %eax -; X86-NEXT: shll $31, %esi -; X86-NEXT: pushl $0 -; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %esi -; X86-NEXT: calll __udivdi3 -; X86-NEXT: addl $16, %esp -; X86-NEXT: cmpl $-1, %eax -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: cmovbl %eax, %ecx -; X86-NEXT: cmpl $1, %edx -; X86-NEXT: movl $1, %esi -; X86-NEXT: cmovbl %edx, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl $0, %esi -; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: notl %esi -; X86-NEXT: orl %eax, %esi -; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl $0, %ecx +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: notl %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: movl %ecx, %esi +; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: addl %edi, %edi -; X86-NEXT: setb %al +; X86-NEXT: setb %cl ; X86-NEXT: cmpl $1, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: cmovel %ecx, %esi -; X86-NEXT: shldl $31, %edi, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl $1, %ebp +; X86-NEXT: cmovael %ebp, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl $1, %ebp +; X86-NEXT: cmovel %eax, %esi +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl $31, %edi, %ecx ; X86-NEXT: shll $31, %edi ; X86-NEXT: pushl $0 ; X86-NEXT: pushl {{[0-9]+}}(%esp) -; X86-NEXT: pushl %eax +; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %edi ; X86-NEXT: calll __udivdi3 ; X86-NEXT: addl $16, %esp -; X86-NEXT: cmpl $-1, %eax -; X86-NEXT: movl $-1, %ebx -; X86-NEXT: cmovbl %eax, %ebx ; X86-NEXT: cmpl $1, %edx ; X86-NEXT: movl $0, %edi ; X86-NEXT: sbbl %edi, %edi ; X86-NEXT: notl %edi ; X86-NEXT: orl %eax, %edi ; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: addl %ebp, %ebp +; X86-NEXT: addl %ebx, %ebx ; X86-NEXT: setb %cl ; X86-NEXT: cmpl $1, %edx -; X86-NEXT: movl %edx, %eax -; X86-NEXT: movl $1, %edx -; X86-NEXT: cmovael %edx, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: cmovel %ebx, %edi -; X86-NEXT: shldl $31, %ebp, %ecx -; X86-NEXT: shll $31, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: cmovael %ebp, %edx +; X86-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-NEXT: cmovel %eax, %edi +; X86-NEXT: shldl $31, %ebx, %ecx +; X86-NEXT: shll $31, %ebx ; X86-NEXT: pushl $0 ; X86-NEXT: pushl {{[0-9]+}}(%esp) ; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx ; X86-NEXT: calll __udivdi3 ; X86-NEXT: addl $16, %esp -; X86-NEXT: cmpl $-1, %eax -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: cmovbl %eax, %ecx +; X86-NEXT: movl %edx, %ebx ; X86-NEXT: cmpl $1, %edx -; X86-NEXT: movl $1, %ebx -; X86-NEXT: cmovbl %edx, %ebx ; X86-NEXT: movl $0, %ebp ; X86-NEXT: sbbl %ebp, %ebp ; X86-NEXT: notl %ebp ; X86-NEXT: orl %eax, %ebp +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: addl %esi, %esi +; X86-NEXT: setb %cl ; X86-NEXT: cmpl $1, %edx -; X86-NEXT: cmovel %ecx, %ebp -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl $1, %edx +; X86-NEXT: cmovael %edx, %ebx +; X86-NEXT: cmovel %eax, %ebp +; X86-NEXT: shldl $31, %esi, %ecx +; X86-NEXT: shll $31, %esi +; X86-NEXT: pushl $0 +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %esi +; X86-NEXT: calll __udivdi3 +; X86-NEXT: addl $16, %esp +; X86-NEXT: cmpl $1, %edx +; X86-NEXT: movl $0, %ecx +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: notl %ecx +; X86-NEXT: orl %eax, %ecx +; X86-NEXT: cmpl $1, %edx +; X86-NEXT: movl $1, %esi +; X86-NEXT: cmovbl %edx, %esi +; X86-NEXT: cmovel %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: shrdl $1, %eax, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NEXT: shrdl $1, %eax, %esi +; X86-NEXT: shrdl $1, %eax, %edx ; X86-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NEXT: shrdl $1, %eax, %edi ; X86-NEXT: shrdl $1, %ebx, %ebp +; X86-NEXT: shrdl $1, %esi, %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ebp, 12(%eax) -; X86-NEXT: movl %edi, 8(%eax) -; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: movl %ebp, 8(%eax) +; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: addl $12, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx