[DAGCombiner] Improve X div/rem Y fold if single bit element type

Summary: Tests by @spatel, thanks

Reviewers: spatel, RKSimon

Reviewed By: spatel

Subscribers: sdardis, atanasyan, llvm-commits, spatel

Differential Revision: https://reviews.llvm.org/D52668

llvm-svn: 345575
This commit is contained in:
David Bolvansky 2018-10-30 09:07:22 +00:00
parent da78171643
commit dfdbb038e8
10 changed files with 62 additions and 940 deletions

View File

@ -3138,11 +3138,12 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
// X / 1 -> X
// X % 1 -> 0
if (N1C && N1C->isOne())
return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
// If this is a boolean op (single-bit element type), we can't have
// division-by-zero or remainder-by-zero, so assume the divisor is 1.
// Similarly, if we're zero-extending a boolean divisor, then assume it's a 1.
// TODO: Similarly, if we're zero-extending a boolean divisor, then assume
// it's a 1.
if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
return SDValue();
}

View File

@ -35,55 +35,32 @@
define signext i1 @sdiv_i1(i1 signext %a, i1 signext %b) {
; GP32-LABEL: sdiv_i1:
; GP32: # %bb.0: # %entry
; GP32-NEXT: div $zero, $4, $5
; GP32-NEXT: teq $5, $zero, 7
; GP32-NEXT: mflo $1
; GP32-NEXT: andi $1, $1, 1
; GP32-NEXT: jr $ra
; GP32-NEXT: negu $2, $1
; GP32-NEXT: move $2, $4
;
; GP32R6-LABEL: sdiv_i1:
; GP32R6: # %bb.0: # %entry
; GP32R6-NEXT: div $1, $4, $5
; GP32R6-NEXT: teq $5, $zero, 7
; GP32R6-NEXT: andi $1, $1, 1
; GP32R6-NEXT: jr $ra
; GP32R6-NEXT: negu $2, $1
; GP32R6-NEXT: move $2, $4
;
; GP64-LABEL: sdiv_i1:
; GP64: # %bb.0: # %entry
; GP64-NEXT: div $zero, $4, $5
; GP64-NEXT: teq $5, $zero, 7
; GP64-NEXT: mflo $1
; GP64-NEXT: andi $1, $1, 1
; GP64-NEXT: jr $ra
; GP64-NEXT: negu $2, $1
; GP64-NEXT: move $2, $4
;
; GP64R6-LABEL: sdiv_i1:
; GP64R6: # %bb.0: # %entry
; GP64R6-NEXT: div $1, $4, $5
; GP64R6-NEXT: teq $5, $zero, 7
; GP64R6-NEXT: andi $1, $1, 1
; GP64R6-NEXT: jr $ra
; GP64R6-NEXT: negu $2, $1
; GP64R6-NEXT: move $2, $4
;
; MMR3-LABEL: sdiv_i1:
; MMR3: # %bb.0: # %entry
; MMR3-NEXT: div $zero, $4, $5
; MMR3-NEXT: teq $5, $zero, 7
; MMR3-NEXT: mflo16 $2
; MMR3-NEXT: andi16 $2, $2, 1
; MMR3-NEXT: li16 $3, 0
; MMR3-NEXT: subu16 $2, $3, $2
; MMR3-NEXT: move $2, $4
; MMR3-NEXT: jrc $ra
;
; MMR6-LABEL: sdiv_i1:
; MMR6: # %bb.0: # %entry
; MMR6-NEXT: div $2, $4, $5
; MMR6-NEXT: teq $5, $zero, 7
; MMR6-NEXT: andi16 $2, $2, 1
; MMR6-NEXT: li16 $3, 0
; MMR6-NEXT: subu16 $2, $3, $2
; MMR6-NEXT: move $2, $4
; MMR6-NEXT: jrc $ra
entry:
%r = sdiv i1 %a, %b

View File

@ -35,55 +35,32 @@
define signext i1 @srem_i1(i1 signext %a, i1 signext %b) {
; GP32-LABEL: srem_i1:
; GP32: # %bb.0: # %entry
; GP32-NEXT: div $zero, $4, $5
; GP32-NEXT: teq $5, $zero, 7
; GP32-NEXT: mfhi $1
; GP32-NEXT: andi $1, $1, 1
; GP32-NEXT: jr $ra
; GP32-NEXT: negu $2, $1
; GP32-NEXT: addiu $2, $zero, 0
;
; GP32R6-LABEL: srem_i1:
; GP32R6: # %bb.0: # %entry
; GP32R6-NEXT: mod $1, $4, $5
; GP32R6-NEXT: teq $5, $zero, 7
; GP32R6-NEXT: andi $1, $1, 1
; GP32R6-NEXT: jr $ra
; GP32R6-NEXT: negu $2, $1
; GP32R6-NEXT: addiu $2, $zero, 0
;
; GP64-LABEL: srem_i1:
; GP64: # %bb.0: # %entry
; GP64-NEXT: div $zero, $4, $5
; GP64-NEXT: teq $5, $zero, 7
; GP64-NEXT: mfhi $1
; GP64-NEXT: andi $1, $1, 1
; GP64-NEXT: jr $ra
; GP64-NEXT: negu $2, $1
; GP64-NEXT: addiu $2, $zero, 0
;
; GP64R6-LABEL: srem_i1:
; GP64R6: # %bb.0: # %entry
; GP64R6-NEXT: mod $1, $4, $5
; GP64R6-NEXT: teq $5, $zero, 7
; GP64R6-NEXT: andi $1, $1, 1
; GP64R6-NEXT: jr $ra
; GP64R6-NEXT: negu $2, $1
; GP64R6-NEXT: addiu $2, $zero, 0
;
; MMR3-LABEL: srem_i1:
; MMR3: # %bb.0: # %entry
; MMR3-NEXT: div $zero, $4, $5
; MMR3-NEXT: teq $5, $zero, 7
; MMR3-NEXT: mfhi16 $2
; MMR3-NEXT: andi16 $2, $2, 1
; MMR3-NEXT: li16 $3, 0
; MMR3-NEXT: subu16 $2, $3, $2
; MMR3-NEXT: li16 $2, 0
; MMR3-NEXT: jrc $ra
;
; MMR6-LABEL: srem_i1:
; MMR6: # %bb.0: # %entry
; MMR6-NEXT: mod $2, $4, $5
; MMR6-NEXT: teq $5, $zero, 7
; MMR6-NEXT: andi16 $2, $2, 1
; MMR6-NEXT: li16 $3, 0
; MMR6-NEXT: subu16 $2, $3, $2
; MMR6-NEXT: li16 $2, 0
; MMR6-NEXT: jrc $ra
entry:
%r = srem i1 %a, %b

View File

@ -35,41 +35,32 @@
define zeroext i1 @udiv_i1(i1 zeroext %a, i1 zeroext %b) {
; GP32-LABEL: udiv_i1:
; GP32: # %bb.0: # %entry
; GP32-NEXT: divu $zero, $4, $5
; GP32-NEXT: teq $5, $zero, 7
; GP32-NEXT: jr $ra
; GP32-NEXT: mflo $2
; GP32-NEXT: move $2, $4
;
; GP32R6-LABEL: udiv_i1:
; GP32R6: # %bb.0: # %entry
; GP32R6-NEXT: divu $2, $4, $5
; GP32R6-NEXT: teq $5, $zero, 7
; GP32R6-NEXT: jrc $ra
; GP32R6-NEXT: jr $ra
; GP32R6-NEXT: move $2, $4
;
; GP64-LABEL: udiv_i1:
; GP64: # %bb.0: # %entry
; GP64-NEXT: divu $zero, $4, $5
; GP64-NEXT: teq $5, $zero, 7
; GP64-NEXT: jr $ra
; GP64-NEXT: mflo $2
; GP64-NEXT: move $2, $4
;
; GP64R6-LABEL: udiv_i1:
; GP64R6: # %bb.0: # %entry
; GP64R6-NEXT: divu $2, $4, $5
; GP64R6-NEXT: teq $5, $zero, 7
; GP64R6-NEXT: jrc $ra
; GP64R6-NEXT: jr $ra
; GP64R6-NEXT: move $2, $4
;
; MMR3-LABEL: udiv_i1:
; MMR3: # %bb.0: # %entry
; MMR3-NEXT: divu $zero, $4, $5
; MMR3-NEXT: teq $5, $zero, 7
; MMR3-NEXT: mflo16 $2
; MMR3-NEXT: move $2, $4
; MMR3-NEXT: jrc $ra
;
; MMR6-LABEL: udiv_i1:
; MMR6: # %bb.0: # %entry
; MMR6-NEXT: divu $2, $4, $5
; MMR6-NEXT: teq $5, $zero, 7
; MMR6-NEXT: move $2, $4
; MMR6-NEXT: jrc $ra
entry:
%r = udiv i1 %a, %b

View File

@ -35,64 +35,32 @@
define signext i1 @urem_i1(i1 signext %a, i1 signext %b) {
; GP32-LABEL: urem_i1:
; GP32: # %bb.0: # %entry
; GP32-NEXT: andi $1, $5, 1
; GP32-NEXT: andi $2, $4, 1
; GP32-NEXT: divu $zero, $2, $1
; GP32-NEXT: teq $1, $zero, 7
; GP32-NEXT: mfhi $1
; GP32-NEXT: andi $1, $1, 1
; GP32-NEXT: jr $ra
; GP32-NEXT: negu $2, $1
; GP32-NEXT: addiu $2, $zero, 0
;
; GP32R6-LABEL: urem_i1:
; GP32R6: # %bb.0: # %entry
; GP32R6-NEXT: andi $1, $5, 1
; GP32R6-NEXT: andi $2, $4, 1
; GP32R6-NEXT: modu $2, $2, $1
; GP32R6-NEXT: teq $1, $zero, 7
; GP32R6-NEXT: jr $ra
; GP32R6-NEXT: negu $2, $2
; GP32R6-NEXT: addiu $2, $zero, 0
;
; GP64-LABEL: urem_i1:
; GP64: # %bb.0: # %entry
; GP64-NEXT: andi $1, $5, 1
; GP64-NEXT: andi $2, $4, 1
; GP64-NEXT: divu $zero, $2, $1
; GP64-NEXT: teq $1, $zero, 7
; GP64-NEXT: mfhi $1
; GP64-NEXT: andi $1, $1, 1
; GP64-NEXT: jr $ra
; GP64-NEXT: negu $2, $1
; GP64-NEXT: addiu $2, $zero, 0
;
; GP64R6-LABEL: urem_i1:
; GP64R6: # %bb.0: # %entry
; GP64R6-NEXT: andi $1, $5, 1
; GP64R6-NEXT: andi $2, $4, 1
; GP64R6-NEXT: modu $2, $2, $1
; GP64R6-NEXT: teq $1, $zero, 7
; GP64R6-NEXT: jr $ra
; GP64R6-NEXT: negu $2, $2
; GP64R6-NEXT: addiu $2, $zero, 0
;
; MMR3-LABEL: urem_i1:
; MMR3: # %bb.0: # %entry
; MMR3-NEXT: andi16 $2, $5, 1
; MMR3-NEXT: andi16 $3, $4, 1
; MMR3-NEXT: divu $zero, $3, $2
; MMR3-NEXT: teq $2, $zero, 7
; MMR3-NEXT: mfhi16 $2
; MMR3-NEXT: andi16 $2, $2, 1
; MMR3-NEXT: li16 $3, 0
; MMR3-NEXT: subu16 $2, $3, $2
; MMR3-NEXT: li16 $2, 0
; MMR3-NEXT: jrc $ra
;
; MMR6-LABEL: urem_i1:
; MMR6: # %bb.0: # %entry
; MMR6-NEXT: andi16 $2, $5, 1
; MMR6-NEXT: andi16 $3, $4, 1
; MMR6-NEXT: modu $3, $3, $2
; MMR6-NEXT: teq $2, $zero, 7
; MMR6-NEXT: li16 $2, 0
; MMR6-NEXT: subu16 $2, $2, $3
; MMR6-NEXT: jrc $ra
entry:
%r = urem i1 %a, %b

View File

@ -3294,322 +3294,16 @@ define i1 @bool_sdiv(i1 %x, i1 %y) {
; CHECK-LABEL: bool_sdiv:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: negb %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: cbtw
; CHECK-NEXT: andb $1, %sil
; CHECK-NEXT: negb %sil
; CHECK-NEXT: idivb %sil
; CHECK-NEXT: retq
%r = sdiv i1 %x, %y
ret i1 %r
}
define <4 x i1> @boolvec_sdiv(<4 x i1> %x, <4 x i1> %y) {
; SSE2-LABEL: boolvec_sdiv:
; SSE2: # %bb.0:
; SSE2-NEXT: pslld $31, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: pslld $31, %xmm0
; SSE2-NEXT: psrad $31, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
; SSE2-NEXT: movd %xmm2, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
; SSE2-NEXT: movd %xmm2, %ecx
; SSE2-NEXT: cltd
; SSE2-NEXT: idivl %ecx
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
; SSE2-NEXT: movd %xmm3, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
; SSE2-NEXT: movd %xmm3, %ecx
; SSE2-NEXT: cltd
; SSE2-NEXT: idivl %ecx
; SSE2-NEXT: movd %eax, %xmm3
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: movd %xmm1, %ecx
; SSE2-NEXT: cltd
; SSE2-NEXT: idivl %ecx
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %ecx
; SSE2-NEXT: cltd
; SSE2-NEXT: idivl %ecx
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: boolvec_sdiv:
; SSE41: # %bb.0:
; SSE41-NEXT: pslld $31, %xmm1
; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: pslld $31, %xmm0
; SSE41-NEXT: psrad $31, %xmm0
; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: pextrd $1, %xmm1, %ecx
; SSE41-NEXT: cltd
; SSE41-NEXT: idivl %ecx
; SSE41-NEXT: movl %eax, %ecx
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: movd %xmm1, %esi
; SSE41-NEXT: cltd
; SSE41-NEXT: idivl %esi
; SSE41-NEXT: movd %eax, %xmm2
; SSE41-NEXT: pinsrd $1, %ecx, %xmm2
; SSE41-NEXT: pextrd $2, %xmm0, %eax
; SSE41-NEXT: pextrd $2, %xmm1, %ecx
; SSE41-NEXT: cltd
; SSE41-NEXT: idivl %ecx
; SSE41-NEXT: pinsrd $2, %eax, %xmm2
; SSE41-NEXT: pextrd $3, %xmm0, %eax
; SSE41-NEXT: pextrd $3, %xmm1, %ecx
; SSE41-NEXT: cltd
; SSE41-NEXT: idivl %ecx
; SSE41-NEXT: pinsrd $3, %eax, %xmm2
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: boolvec_sdiv:
; AVX1: # %bb.0:
; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX1-NEXT: vpextrd $1, %xmm0, %eax
; AVX1-NEXT: vpextrd $1, %xmm1, %ecx
; AVX1-NEXT: cltd
; AVX1-NEXT: idivl %ecx
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vmovd %xmm1, %esi
; AVX1-NEXT: cltd
; AVX1-NEXT: idivl %esi
; AVX1-NEXT: vmovd %eax, %xmm2
; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
; AVX1-NEXT: vpextrd $2, %xmm0, %eax
; AVX1-NEXT: vpextrd $2, %xmm1, %ecx
; AVX1-NEXT: cltd
; AVX1-NEXT: idivl %ecx
; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
; AVX1-NEXT: vpextrd $3, %xmm0, %eax
; AVX1-NEXT: vpextrd $3, %xmm1, %ecx
; AVX1-NEXT: cltd
; AVX1-NEXT: idivl %ecx
; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: boolvec_sdiv:
; AVX2: # %bb.0:
; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX2-NEXT: vpextrd $1, %xmm0, %eax
; AVX2-NEXT: vpextrd $1, %xmm1, %ecx
; AVX2-NEXT: cltd
; AVX2-NEXT: idivl %ecx
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vmovd %xmm1, %esi
; AVX2-NEXT: cltd
; AVX2-NEXT: idivl %esi
; AVX2-NEXT: vmovd %eax, %xmm2
; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
; AVX2-NEXT: vpextrd $2, %xmm0, %eax
; AVX2-NEXT: vpextrd $2, %xmm1, %ecx
; AVX2-NEXT: cltd
; AVX2-NEXT: idivl %ecx
; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
; AVX2-NEXT: vpextrd $3, %xmm0, %eax
; AVX2-NEXT: vpextrd $3, %xmm1, %ecx
; AVX2-NEXT: cltd
; AVX2-NEXT: idivl %ecx
; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: boolvec_sdiv:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k3
; AVX512F-NEXT: kshiftrw $3, %k3, %k0
; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k4
; AVX512F-NEXT: kshiftrw $3, %k4, %k1
; AVX512F-NEXT: kshiftrw $2, %k3, %k2
; AVX512F-NEXT: kshiftrw $2, %k4, %k5
; AVX512F-NEXT: kmovw %k5, %ecx
; AVX512F-NEXT: kshiftrw $1, %k3, %k5
; AVX512F-NEXT: kmovw %k3, %edi
; AVX512F-NEXT: kshiftrw $1, %k4, %k3
; AVX512F-NEXT: kmovw %k4, %esi
; AVX512F-NEXT: kmovw %k5, %edx
; AVX512F-NEXT: kmovw %k3, %eax
; AVX512F-NEXT: andb $1, %al
; AVX512F-NEXT: negb %al
; AVX512F-NEXT: # kill: def $al killed $al killed $eax
; AVX512F-NEXT: cbtw
; AVX512F-NEXT: andb $1, %dl
; AVX512F-NEXT: negb %dl
; AVX512F-NEXT: idivb %dl
; AVX512F-NEXT: movl %eax, %edx
; AVX512F-NEXT: andb $1, %sil
; AVX512F-NEXT: negb %sil
; AVX512F-NEXT: movl %esi, %eax
; AVX512F-NEXT: cbtw
; AVX512F-NEXT: andb $1, %dil
; AVX512F-NEXT: negb %dil
; AVX512F-NEXT: idivb %dil
; AVX512F-NEXT: movl %eax, %esi
; AVX512F-NEXT: andb $1, %cl
; AVX512F-NEXT: negb %cl
; AVX512F-NEXT: movl %ecx, %eax
; AVX512F-NEXT: cbtw
; AVX512F-NEXT: kmovw %k2, %ecx
; AVX512F-NEXT: andb $1, %cl
; AVX512F-NEXT: negb %cl
; AVX512F-NEXT: idivb %cl
; AVX512F-NEXT: movl %eax, %ecx
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: andb $1, %al
; AVX512F-NEXT: negb %al
; AVX512F-NEXT: # kill: def $al killed $al killed $eax
; AVX512F-NEXT: cbtw
; AVX512F-NEXT: kmovw %k0, %edi
; AVX512F-NEXT: andb $1, %dil
; AVX512F-NEXT: negb %dil
; AVX512F-NEXT: idivb %dil
; AVX512F-NEXT: # kill: def $al killed $al def $eax
; AVX512F-NEXT: kmovw %edx, %k0
; AVX512F-NEXT: kmovw %esi, %k1
; AVX512F-NEXT: kshiftrw $1, %k1, %k2
; AVX512F-NEXT: kxorw %k0, %k2, %k0
; AVX512F-NEXT: kshiftlw $15, %k0, %k0
; AVX512F-NEXT: kshiftrw $14, %k0, %k0
; AVX512F-NEXT: kxorw %k0, %k1, %k0
; AVX512F-NEXT: kshiftrw $2, %k0, %k1
; AVX512F-NEXT: kmovw %ecx, %k2
; AVX512F-NEXT: kxorw %k2, %k1, %k1
; AVX512F-NEXT: kshiftlw $15, %k1, %k1
; AVX512F-NEXT: kshiftrw $13, %k1, %k1
; AVX512F-NEXT: kxorw %k1, %k0, %k0
; AVX512F-NEXT: kshiftlw $13, %k0, %k0
; AVX512F-NEXT: kshiftrw $13, %k0, %k0
; AVX512F-NEXT: kmovw %eax, %k1
; AVX512F-NEXT: kshiftlw $3, %k1, %k1
; AVX512F-NEXT: korw %k1, %k0, %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: boolvec_sdiv:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm1
; AVX512BW-NEXT: vptestmd %xmm1, %xmm1, %k3
; AVX512BW-NEXT: kshiftrw $3, %k3, %k0
; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
; AVX512BW-NEXT: vptestmd %xmm0, %xmm0, %k4
; AVX512BW-NEXT: kshiftrw $3, %k4, %k1
; AVX512BW-NEXT: kshiftrw $2, %k3, %k2
; AVX512BW-NEXT: kshiftrw $2, %k4, %k5
; AVX512BW-NEXT: kmovd %k5, %ecx
; AVX512BW-NEXT: kshiftrw $1, %k3, %k5
; AVX512BW-NEXT: kmovd %k3, %edi
; AVX512BW-NEXT: kshiftrw $1, %k4, %k3
; AVX512BW-NEXT: kmovd %k4, %esi
; AVX512BW-NEXT: kmovd %k5, %edx
; AVX512BW-NEXT: kmovd %k3, %eax
; AVX512BW-NEXT: andb $1, %al
; AVX512BW-NEXT: negb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: cbtw
; AVX512BW-NEXT: andb $1, %dl
; AVX512BW-NEXT: negb %dl
; AVX512BW-NEXT: idivb %dl
; AVX512BW-NEXT: movl %eax, %edx
; AVX512BW-NEXT: andb $1, %sil
; AVX512BW-NEXT: negb %sil
; AVX512BW-NEXT: movl %esi, %eax
; AVX512BW-NEXT: cbtw
; AVX512BW-NEXT: andb $1, %dil
; AVX512BW-NEXT: negb %dil
; AVX512BW-NEXT: idivb %dil
; AVX512BW-NEXT: movl %eax, %esi
; AVX512BW-NEXT: andb $1, %cl
; AVX512BW-NEXT: negb %cl
; AVX512BW-NEXT: movl %ecx, %eax
; AVX512BW-NEXT: cbtw
; AVX512BW-NEXT: kmovd %k2, %ecx
; AVX512BW-NEXT: andb $1, %cl
; AVX512BW-NEXT: negb %cl
; AVX512BW-NEXT: idivb %cl
; AVX512BW-NEXT: movl %eax, %ecx
; AVX512BW-NEXT: kmovd %k1, %eax
; AVX512BW-NEXT: andb $1, %al
; AVX512BW-NEXT: negb %al
; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
; AVX512BW-NEXT: cbtw
; AVX512BW-NEXT: kmovd %k0, %edi
; AVX512BW-NEXT: andb $1, %dil
; AVX512BW-NEXT: negb %dil
; AVX512BW-NEXT: idivb %dil
; AVX512BW-NEXT: # kill: def $al killed $al def $eax
; AVX512BW-NEXT: kmovd %edx, %k0
; AVX512BW-NEXT: kmovd %esi, %k1
; AVX512BW-NEXT: kshiftrw $1, %k1, %k2
; AVX512BW-NEXT: kxorw %k0, %k2, %k0
; AVX512BW-NEXT: kshiftlw $15, %k0, %k0
; AVX512BW-NEXT: kshiftrw $14, %k0, %k0
; AVX512BW-NEXT: kxorw %k0, %k1, %k0
; AVX512BW-NEXT: kshiftrw $2, %k0, %k1
; AVX512BW-NEXT: kmovd %ecx, %k2
; AVX512BW-NEXT: kxorw %k2, %k1, %k1
; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
; AVX512BW-NEXT: kshiftrw $13, %k1, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
; AVX512BW-NEXT: kshiftlw $13, %k0, %k0
; AVX512BW-NEXT: kshiftrw $13, %k0, %k0
; AVX512BW-NEXT: kmovd %eax, %k1
; AVX512BW-NEXT: kshiftlw $3, %k1, %k1
; AVX512BW-NEXT: korw %k1, %k0, %k1
; AVX512BW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512BW-NEXT: retq
;
; XOP-LABEL: boolvec_sdiv:
; XOP: # %bb.0:
; XOP-NEXT: vpslld $31, %xmm1, %xmm1
; XOP-NEXT: vpsrad $31, %xmm1, %xmm1
; XOP-NEXT: vpslld $31, %xmm0, %xmm0
; XOP-NEXT: vpsrad $31, %xmm0, %xmm0
; XOP-NEXT: vpextrd $1, %xmm0, %eax
; XOP-NEXT: vpextrd $1, %xmm1, %ecx
; XOP-NEXT: cltd
; XOP-NEXT: idivl %ecx
; XOP-NEXT: movl %eax, %ecx
; XOP-NEXT: vmovd %xmm0, %eax
; XOP-NEXT: vmovd %xmm1, %esi
; XOP-NEXT: cltd
; XOP-NEXT: idivl %esi
; XOP-NEXT: vmovd %eax, %xmm2
; XOP-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
; XOP-NEXT: vpextrd $2, %xmm0, %eax
; XOP-NEXT: vpextrd $2, %xmm1, %ecx
; XOP-NEXT: cltd
; XOP-NEXT: idivl %ecx
; XOP-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
; XOP-NEXT: vpextrd $3, %xmm0, %eax
; XOP-NEXT: vpextrd $3, %xmm1, %ecx
; XOP-NEXT: cltd
; XOP-NEXT: idivl %ecx
; XOP-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
; XOP-NEXT: retq
; CHECK-LABEL: boolvec_sdiv:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
%r = sdiv <4 x i1> %x, %y
ret <4 x i1> %r
}

View File

@ -462,16 +462,7 @@ define i32 @ossfuzz6883() {
define i1 @bool_srem(i1 %x, i1 %y) {
; CHECK-LABEL: bool_srem:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: andb $1, %al
; CHECK-NEXT: negb %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: cbtw
; CHECK-NEXT: andb $1, %sil
; CHECK-NEXT: negb %sil
; CHECK-NEXT: idivb %sil
; CHECK-NEXT: movsbl %ah, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retq
%r = srem i1 %x, %y
ret i1 %r
@ -479,61 +470,12 @@ define i1 @bool_srem(i1 %x, i1 %y) {
define <4 x i1> @boolvec_srem(<4 x i1> %x, <4 x i1> %y) {
; SSE-LABEL: boolvec_srem:
; SSE: # %bb.0:
; SSE-NEXT: pslld $31, %xmm1
; SSE-NEXT: psrad $31, %xmm1
; SSE-NEXT: pslld $31, %xmm0
; SSE-NEXT: psrad $31, %xmm0
; SSE-NEXT: pextrd $1, %xmm0, %eax
; SSE-NEXT: pextrd $1, %xmm1, %ecx
; SSE-NEXT: cltd
; SSE-NEXT: idivl %ecx
; SSE-NEXT: movl %edx, %ecx
; SSE-NEXT: movd %xmm0, %eax
; SSE-NEXT: movd %xmm1, %esi
; SSE-NEXT: cltd
; SSE-NEXT: idivl %esi
; SSE-NEXT: movd %edx, %xmm2
; SSE-NEXT: pinsrd $1, %ecx, %xmm2
; SSE-NEXT: pextrd $2, %xmm0, %eax
; SSE-NEXT: pextrd $2, %xmm1, %ecx
; SSE-NEXT: cltd
; SSE-NEXT: idivl %ecx
; SSE-NEXT: pinsrd $2, %edx, %xmm2
; SSE-NEXT: pextrd $3, %xmm0, %eax
; SSE-NEXT: pextrd $3, %xmm1, %ecx
; SSE-NEXT: cltd
; SSE-NEXT: idivl %ecx
; SSE-NEXT: pinsrd $3, %edx, %xmm2
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: boolvec_srem:
; AVX: # %bb.0:
; AVX-NEXT: vpslld $31, %xmm1, %xmm1
; AVX-NEXT: vpsrad $31, %xmm1, %xmm1
; AVX-NEXT: vpslld $31, %xmm0, %xmm0
; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
; AVX-NEXT: vpextrd $1, %xmm0, %eax
; AVX-NEXT: vpextrd $1, %xmm1, %ecx
; AVX-NEXT: cltd
; AVX-NEXT: idivl %ecx
; AVX-NEXT: movl %edx, %ecx
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vmovd %xmm1, %esi
; AVX-NEXT: cltd
; AVX-NEXT: idivl %esi
; AVX-NEXT: vmovd %edx, %xmm2
; AVX-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
; AVX-NEXT: vpextrd $2, %xmm0, %eax
; AVX-NEXT: vpextrd $2, %xmm1, %ecx
; AVX-NEXT: cltd
; AVX-NEXT: idivl %ecx
; AVX-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2
; AVX-NEXT: vpextrd $3, %xmm0, %eax
; AVX-NEXT: vpextrd $3, %xmm1, %ecx
; AVX-NEXT: cltd
; AVX-NEXT: idivl %ecx
; AVX-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%r = srem <4 x i1> %x, %y
ret <4 x i1> %r

View File

@ -911,166 +911,17 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
define i1 @bool_udiv(i1 %x, i1 %y) {
; CHECK-LABEL: bool_udiv:
; CHECK: # %bb.0:
; CHECK-NEXT: andb $1, %sil
; CHECK-NEXT: andb $1, %dil
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: # kill: def $eax killed $eax def $ax
; CHECK-NEXT: divb %sil
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%r = udiv i1 %x, %y
ret i1 %r
}
define <4 x i1> @boolvec_udiv(<4 x i1> %x, <4 x i1> %y) {
; SSE2-LABEL: boolvec_udiv:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
; SSE2-NEXT: movd %xmm2, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
; SSE2-NEXT: movd %xmm2, %ecx
; SSE2-NEXT: xorl %edx, %edx
; SSE2-NEXT: divl %ecx
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
; SSE2-NEXT: movd %xmm3, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
; SSE2-NEXT: movd %xmm3, %ecx
; SSE2-NEXT: xorl %edx, %edx
; SSE2-NEXT: divl %ecx
; SSE2-NEXT: movd %eax, %xmm3
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: movd %xmm1, %ecx
; SSE2-NEXT: xorl %edx, %edx
; SSE2-NEXT: divl %ecx
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; SSE2-NEXT: movd %xmm0, %ecx
; SSE2-NEXT: xorl %edx, %edx
; SSE2-NEXT: divl %ecx
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: boolvec_udiv:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
; SSE41-NEXT: pand %xmm2, %xmm1
; SSE41-NEXT: pand %xmm2, %xmm0
; SSE41-NEXT: pextrd $1, %xmm0, %eax
; SSE41-NEXT: pextrd $1, %xmm1, %ecx
; SSE41-NEXT: xorl %edx, %edx
; SSE41-NEXT: divl %ecx
; SSE41-NEXT: movl %eax, %ecx
; SSE41-NEXT: movd %xmm0, %eax
; SSE41-NEXT: movd %xmm1, %esi
; SSE41-NEXT: xorl %edx, %edx
; SSE41-NEXT: divl %esi
; SSE41-NEXT: movd %eax, %xmm2
; SSE41-NEXT: pinsrd $1, %ecx, %xmm2
; SSE41-NEXT: pextrd $2, %xmm0, %eax
; SSE41-NEXT: pextrd $2, %xmm1, %ecx
; SSE41-NEXT: xorl %edx, %edx
; SSE41-NEXT: divl %ecx
; SSE41-NEXT: pinsrd $2, %eax, %xmm2
; SSE41-NEXT: pextrd $3, %xmm0, %eax
; SSE41-NEXT: pextrd $3, %xmm1, %ecx
; SSE41-NEXT: xorl %edx, %edx
; SSE41-NEXT: divl %ecx
; SSE41-NEXT: pinsrd $3, %eax, %xmm2
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: boolvec_udiv:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1]
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpextrd $1, %xmm0, %eax
; AVX1-NEXT: vpextrd $1, %xmm1, %ecx
; AVX1-NEXT: xorl %edx, %edx
; AVX1-NEXT: divl %ecx
; AVX1-NEXT: movl %eax, %ecx
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vmovd %xmm1, %esi
; AVX1-NEXT: xorl %edx, %edx
; AVX1-NEXT: divl %esi
; AVX1-NEXT: vmovd %eax, %xmm2
; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
; AVX1-NEXT: vpextrd $2, %xmm0, %eax
; AVX1-NEXT: vpextrd $2, %xmm1, %ecx
; AVX1-NEXT: xorl %edx, %edx
; AVX1-NEXT: divl %ecx
; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
; AVX1-NEXT: vpextrd $3, %xmm0, %eax
; AVX1-NEXT: vpextrd $3, %xmm1, %ecx
; AVX1-NEXT: xorl %edx, %edx
; AVX1-NEXT: divl %ecx
; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: boolvec_udiv:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpextrd $1, %xmm0, %eax
; AVX2-NEXT: vpextrd $1, %xmm1, %ecx
; AVX2-NEXT: xorl %edx, %edx
; AVX2-NEXT: divl %ecx
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vmovd %xmm1, %esi
; AVX2-NEXT: xorl %edx, %edx
; AVX2-NEXT: divl %esi
; AVX2-NEXT: vmovd %eax, %xmm2
; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
; AVX2-NEXT: vpextrd $2, %xmm0, %eax
; AVX2-NEXT: vpextrd $2, %xmm1, %ecx
; AVX2-NEXT: xorl %edx, %edx
; AVX2-NEXT: divl %ecx
; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
; AVX2-NEXT: vpextrd $3, %xmm0, %eax
; AVX2-NEXT: vpextrd $3, %xmm1, %ecx
; AVX2-NEXT: xorl %edx, %edx
; AVX2-NEXT: divl %ecx
; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
; AVX2-NEXT: retq
;
; XOP-LABEL: boolvec_udiv:
; XOP: # %bb.0:
; XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1]
; XOP-NEXT: vpand %xmm2, %xmm1, %xmm1
; XOP-NEXT: vpand %xmm2, %xmm0, %xmm0
; XOP-NEXT: vpextrd $1, %xmm0, %eax
; XOP-NEXT: vpextrd $1, %xmm1, %ecx
; XOP-NEXT: xorl %edx, %edx
; XOP-NEXT: divl %ecx
; XOP-NEXT: movl %eax, %ecx
; XOP-NEXT: vmovd %xmm0, %eax
; XOP-NEXT: vmovd %xmm1, %esi
; XOP-NEXT: xorl %edx, %edx
; XOP-NEXT: divl %esi
; XOP-NEXT: vmovd %eax, %xmm2
; XOP-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
; XOP-NEXT: vpextrd $2, %xmm0, %eax
; XOP-NEXT: vpextrd $2, %xmm1, %ecx
; XOP-NEXT: xorl %edx, %edx
; XOP-NEXT: divl %ecx
; XOP-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
; XOP-NEXT: vpextrd $3, %xmm0, %eax
; XOP-NEXT: vpextrd $3, %xmm1, %ecx
; XOP-NEXT: xorl %edx, %edx
; XOP-NEXT: divl %ecx
; XOP-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
; XOP-NEXT: retq
; CHECK-LABEL: boolvec_udiv:
; CHECK: # %bb.0:
; CHECK-NEXT: retq
%r = udiv <4 x i1> %x, %y
ret <4 x i1> %r
}

View File

@ -383,13 +383,7 @@ define <4 x i32> @combine_vec_urem_by_shl_pow2b(<4 x i32> %x, <4 x i32> %y) {
define i1 @bool_urem(i1 %x, i1 %y) {
; CHECK-LABEL: bool_urem:
; CHECK: # %bb.0:
; CHECK-NEXT: andb $1, %sil
; CHECK-NEXT: andb $1, %dil
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: # kill: def $eax killed $eax def $ax
; CHECK-NEXT: divb %sil
; CHECK-NEXT: movzbl %ah, %eax
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retq
%r = urem i1 %x, %y
ret i1 %r
@ -398,88 +392,13 @@ define i1 @bool_urem(i1 %x, i1 %y) {
define <4 x i1> @boolvec_urem(<4 x i1> %x, <4 x i1> %y) {
; SSE-LABEL: boolvec_urem:
; SSE: # %bb.0:
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: pextrd $1, %xmm0, %eax
; SSE-NEXT: pextrd $1, %xmm1, %ecx
; SSE-NEXT: xorl %edx, %edx
; SSE-NEXT: divl %ecx
; SSE-NEXT: movl %edx, %ecx
; SSE-NEXT: movd %xmm0, %eax
; SSE-NEXT: movd %xmm1, %esi
; SSE-NEXT: xorl %edx, %edx
; SSE-NEXT: divl %esi
; SSE-NEXT: movd %edx, %xmm2
; SSE-NEXT: pinsrd $1, %ecx, %xmm2
; SSE-NEXT: pextrd $2, %xmm0, %eax
; SSE-NEXT: pextrd $2, %xmm1, %ecx
; SSE-NEXT: xorl %edx, %edx
; SSE-NEXT: divl %ecx
; SSE-NEXT: pinsrd $2, %edx, %xmm2
; SSE-NEXT: pextrd $3, %xmm0, %eax
; SSE-NEXT: pextrd $3, %xmm1, %ecx
; SSE-NEXT: xorl %edx, %edx
; SSE-NEXT: divl %ecx
; SSE-NEXT: pinsrd $3, %edx, %xmm2
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: boolvec_urem:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1]
; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpextrd $1, %xmm0, %eax
; AVX1-NEXT: vpextrd $1, %xmm1, %ecx
; AVX1-NEXT: xorl %edx, %edx
; AVX1-NEXT: divl %ecx
; AVX1-NEXT: movl %edx, %ecx
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vmovd %xmm1, %esi
; AVX1-NEXT: xorl %edx, %edx
; AVX1-NEXT: divl %esi
; AVX1-NEXT: vmovd %edx, %xmm2
; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
; AVX1-NEXT: vpextrd $2, %xmm0, %eax
; AVX1-NEXT: vpextrd $2, %xmm1, %ecx
; AVX1-NEXT: xorl %edx, %edx
; AVX1-NEXT: divl %ecx
; AVX1-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2
; AVX1-NEXT: vpextrd $3, %xmm0, %eax
; AVX1-NEXT: vpextrd $3, %xmm1, %ecx
; AVX1-NEXT: xorl %edx, %edx
; AVX1-NEXT: divl %ecx
; AVX1-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: boolvec_urem:
; AVX2: # %bb.0:
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpextrd $1, %xmm0, %eax
; AVX2-NEXT: vpextrd $1, %xmm1, %ecx
; AVX2-NEXT: xorl %edx, %edx
; AVX2-NEXT: divl %ecx
; AVX2-NEXT: movl %edx, %ecx
; AVX2-NEXT: vmovd %xmm0, %eax
; AVX2-NEXT: vmovd %xmm1, %esi
; AVX2-NEXT: xorl %edx, %edx
; AVX2-NEXT: divl %esi
; AVX2-NEXT: vmovd %edx, %xmm2
; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
; AVX2-NEXT: vpextrd $2, %xmm0, %eax
; AVX2-NEXT: vpextrd $2, %xmm1, %ecx
; AVX2-NEXT: xorl %edx, %edx
; AVX2-NEXT: divl %ecx
; AVX2-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2
; AVX2-NEXT: vpextrd $3, %xmm0, %eax
; AVX2-NEXT: vpextrd $3, %xmm1, %ecx
; AVX2-NEXT: xorl %edx, %edx
; AVX2-NEXT: divl %ecx
; AVX2-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0
; AVX2-NEXT: retq
; AVX-LABEL: boolvec_urem:
; AVX: # %bb.0:
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%r = urem <4 x i1> %x, %y
ret <4 x i1> %r
}

View File

@ -6,68 +6,13 @@
define void @f() {
; X64-LABEL: f:
; X64: # %bb.0: # %BB
; X64-NEXT: pushq %rbp
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: pushq %r14
; X64-NEXT: .cfi_def_cfa_offset 24
; X64-NEXT: pushq %rbx
; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: subq $16, %rsp
; X64-NEXT: .cfi_def_cfa_offset 48
; X64-NEXT: .cfi_offset %rbx, -32
; X64-NEXT: .cfi_offset %r14, -24
; X64-NEXT: .cfi_offset %rbp, -16
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp
; X64-NEXT: movq (%rsp), %rbx
; X64-NEXT: movb (%rax), %al
; X64-NEXT: movzbl %al, %eax
; X64-NEXT: # kill: def $eax killed $eax def $ax
; X64-NEXT: divb (%rax)
; X64-NEXT: movl %eax, %r14d
; X64-NEXT: movq %rbp, %rcx
; X64-NEXT: shlq $62, %rcx
; X64-NEXT: sarq $62, %rcx
; X64-NEXT: xorl %edi, %edi
; X64-NEXT: xorl %esi, %esi
; X64-NEXT: movq %rbx, %rdx
; X64-NEXT: callq __modti3
; X64-NEXT: andl $3, %edx
; X64-NEXT: movb (%rax), %al
; X64-NEXT: testb %al, %al
; X64-NEXT: setne (%rax)
; X64-NEXT: cmpq %rax, %rbx
; X64-NEXT: sbbq %rdx, %rbp
; X64-NEXT: setae %dl
; X64-NEXT: sbbb %cl, %cl
; X64-NEXT: testb %al, %al
; X64-NEXT: setne %bl
; X64-NEXT: negb %dl
; X64-NEXT: cmpb %r14b, %al
; X64-NEXT: setle %al
; X64-NEXT: negb %al
; X64-NEXT: cbtw
; X64-NEXT: idivb %dl
; X64-NEXT: movsbl %ah, %eax
; X64-NEXT: movzbl %al, %eax
; X64-NEXT: andl $1, %eax
; X64-NEXT: shlq $4, %rax
; X64-NEXT: negq %rax
; X64-NEXT: negb %bl
; X64-NEXT: leaq -16(%rsp,%rax), %rax
; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
; X64-NEXT: movq %rax, (%rax)
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: cbtw
; X64-NEXT: idivb %bl
; X64-NEXT: movsbl %ah, %eax
; X64-NEXT: andb $1, %al
; X64-NEXT: movb %al, (%rax)
; X64-NEXT: addq $16, %rsp
; X64-NEXT: .cfi_def_cfa_offset 32
; X64-NEXT: popq %rbx
; X64-NEXT: .cfi_def_cfa_offset 24
; X64-NEXT: popq %r14
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: popq %rbp
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: movb $0, (%rax)
; X64-NEXT: retq
;
; X86-LABEL: f:
@ -77,75 +22,16 @@ define void @f() {
; X86-NEXT: .cfi_offset %ebp, -8
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $48, %esp
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: subl $16, %esp
; X86-NEXT: movb (%eax), %al
; X86-NEXT: movb (%eax), %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: # kill: def $eax killed $eax def $ax
; X86-NEXT: divb (%eax)
; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movl %esi, %eax
; X86-NEXT: shll $30, %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: sarl $30, %ecx
; X86-NEXT: sarl $31, %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %edx
; X86-NEXT: pushl %eax
; X86-NEXT: pushl %ecx
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl $0
; X86-NEXT: pushl $0
; X86-NEXT: pushl $0
; X86-NEXT: pushl $0
; X86-NEXT: pushl %edx
; X86-NEXT: calll __modti3
; X86-NEXT: addl $32, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andl $3, %eax
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: sbbl %eax, %esi
; X86-NEXT: sbbl $0, %ecx
; X86-NEXT: setae %dl
; X86-NEXT: sbbb %cl, %cl
; X86-NEXT: testb %al, %al
; X86-NEXT: setne %ch
; X86-NEXT: setne (%eax)
; X86-NEXT: negb %ch
; X86-NEXT: negb %dl
; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
; X86-NEXT: setle %al
; X86-NEXT: negb %al
; X86-NEXT: cbtw
; X86-NEXT: idivb %dl
; X86-NEXT: movsbl %ah, %eax
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: negl %eax
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: leal -4(%esp,%eax,4), %eax
; X86-NEXT: leal -{{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, (%eax)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: cbtw
; X86-NEXT: idivb %ch
; X86-NEXT: movsbl %ah, %eax
; X86-NEXT: andb $1, %al
; X86-NEXT: movb %al, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: movb $0, (%eax)
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl
@ -177,50 +63,13 @@ BB:
define void @g() {
; X64-LABEL: g:
; X64: # %bb.0: # %BB
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi
; X64-NEXT: shlq $32, %rsi
; X64-NEXT: orq %rax, %rsi
; X64-NEXT: movq %rsi, %rdi
; X64-NEXT: shlq $30, %rdi
; X64-NEXT: sarq $30, %rdi
; X64-NEXT: movb (%rax), %al
; X64-NEXT: movzbl %al, %eax
; X64-NEXT: # kill: def $eax killed $eax def $ax
; X64-NEXT: divb (%rax)
; X64-NEXT: movl %eax, %r8d
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: idivq %rdi
; X64-NEXT: movabsq $17179869183, %rax # imm = 0x3FFFFFFFF
; X64-NEXT: andq %rdx, %rax
; X64-NEXT: movb (%rax), %al
; X64-NEXT: testb %al, %al
; X64-NEXT: setne %dil
; X64-NEXT: setne (%rax)
; X64-NEXT: cmpq %rsi, %rax
; X64-NEXT: seta %dl
; X64-NEXT: setbe %cl
; X64-NEXT: negb %cl
; X64-NEXT: cmpb %r8b, %al
; X64-NEXT: setle %al
; X64-NEXT: negb %al
; X64-NEXT: cbtw
; X64-NEXT: idivb %cl
; X64-NEXT: movsbl %ah, %eax
; X64-NEXT: movzbl %al, %eax
; X64-NEXT: andl $1, %eax
; X64-NEXT: shlq $3, %rax
; X64-NEXT: negq %rax
; X64-NEXT: negb %dil
; X64-NEXT: negb %dl
; X64-NEXT: leaq -16(%rsp,%rax), %rax
; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
; X64-NEXT: movq %rax, (%rax)
; X64-NEXT: movl %edx, %eax
; X64-NEXT: cbtw
; X64-NEXT: idivb %dil
; X64-NEXT: movsbl %ah, %eax
; X64-NEXT: andb $1, %al
; X64-NEXT: movb %al, (%rax)
; X64-NEXT: movb $0, (%rax)
; X64-NEXT: retq
;
; X86-LABEL: g:
@ -230,63 +79,16 @@ define void @g() {
; X86-NEXT: .cfi_offset %ebp, -8
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: .cfi_def_cfa_register %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $16, %esp
; X86-NEXT: .cfi_offset %esi, -20
; X86-NEXT: .cfi_offset %edi, -16
; X86-NEXT: .cfi_offset %ebx, -12
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl (%esp), %edi
; X86-NEXT: subl $8, %esp
; X86-NEXT: movb (%eax), %al
; X86-NEXT: movb (%eax), %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: # kill: def $eax killed $eax def $ax
; X86-NEXT: divb (%eax)
; X86-NEXT: movl %eax, %ebx
; X86-NEXT: movl %esi, %eax
; X86-NEXT: shll $30, %eax
; X86-NEXT: sarl $30, %eax
; X86-NEXT: pushl %eax
; X86-NEXT: pushl %edi
; X86-NEXT: pushl $0
; X86-NEXT: pushl $0
; X86-NEXT: calll __moddi3
; X86-NEXT: addl $16, %esp
; X86-NEXT: andl $3, %edx
; X86-NEXT: testb %al, %al
; X86-NEXT: setne (%eax)
; X86-NEXT: cmpl %eax, %edi
; X86-NEXT: sbbl %edx, %esi
; X86-NEXT: setae %dl
; X86-NEXT: sbbb %cl, %cl
; X86-NEXT: testb %al, %al
; X86-NEXT: setne %ch
; X86-NEXT: negb %dl
; X86-NEXT: cmpb %bl, %al
; X86-NEXT: setle %al
; X86-NEXT: negb %al
; X86-NEXT: cbtw
; X86-NEXT: idivb %dl
; X86-NEXT: movsbl %ah, %eax
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: andl $1, %eax
; X86-NEXT: shll $3, %eax
; X86-NEXT: negl %eax
; X86-NEXT: negb %ch
; X86-NEXT: leal -8(%esp,%eax), %eax
; X86-NEXT: leal -{{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, (%eax)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: cbtw
; X86-NEXT: idivb %ch
; X86-NEXT: movsbl %ah, %eax
; X86-NEXT: andb $1, %al
; X86-NEXT: movb %al, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: movb $0, (%eax)
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: .cfi_def_cfa %esp, 4
; X86-NEXT: retl