[X86] Remove a redundant COPY_TO_REGCLASS for VK16 after a KMOVWkr in an isel output pattern.

KMOVWkr produces VK16, there's no reason to copy it to VK16 again.

Test changes are presumably because we were scheduling based on
the COPY that is no longer there.
This commit is contained in:
Craig Topper 2020-08-25 15:16:50 -07:00
parent 22cd6bee4a
commit ba319ac47e
11 changed files with 173 additions and 176 deletions

View File

@ -2967,10 +2967,9 @@ let Predicates = [HasAVX512] in {
def : Pat<(insert_subvector (v16i1 immAllZerosV),
(v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
(COPY_TO_REGCLASS
(KMOVWkr (AND32ri8
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
(i32 1))), VK16)>;
(KMOVWkr (AND32ri8
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
(i32 1)))>;
}
// Mask unary operation

View File

@ -1766,39 +1766,39 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
define i16 @trunc_i32_to_i1(i32 %a) {
; KNL-LABEL: trunc_i32_to_i1:
; KNL: # %bb.0:
; KNL-NEXT: movw $-4, %ax
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: andl $1, %edi
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: movw $-4, %ax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: korw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: # kill: def $ax killed $ax killed $eax
; KNL-NEXT: retq
;
; SKX-LABEL: trunc_i32_to_i1:
; SKX: # %bb.0:
; SKX-NEXT: movw $-4, %ax
; SKX-NEXT: kmovd %eax, %k0
; SKX-NEXT: kshiftrw $1, %k0, %k0
; SKX-NEXT: kshiftlw $1, %k0, %k0
; SKX-NEXT: andl $1, %edi
; SKX-NEXT: kmovw %edi, %k1
; SKX-NEXT: korw %k1, %k0, %k0
; SKX-NEXT: kmovw %edi, %k0
; SKX-NEXT: movw $-4, %ax
; SKX-NEXT: kmovd %eax, %k1
; SKX-NEXT: kshiftrw $1, %k1, %k1
; SKX-NEXT: kshiftlw $1, %k1, %k1
; SKX-NEXT: korw %k0, %k1, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: retq
;
; AVX512DQNOBW-LABEL: trunc_i32_to_i1:
; AVX512DQNOBW: # %bb.0:
; AVX512DQNOBW-NEXT: movw $-4, %ax
; AVX512DQNOBW-NEXT: kmovw %eax, %k0
; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQNOBW-NEXT: andl $1, %edi
; AVX512DQNOBW-NEXT: kmovw %edi, %k1
; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
; AVX512DQNOBW-NEXT: kmovw %edi, %k0
; AVX512DQNOBW-NEXT: movw $-4, %ax
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
; AVX512DQNOBW-NEXT: korw %k0, %k1, %k0
; AVX512DQNOBW-NEXT: kmovw %k0, %eax
; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512DQNOBW-NEXT: retq

View File

@ -2181,32 +2181,32 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index
define void @test_concat_v2i1(<2 x half>* %arg, <2 x half>* %arg1, <2 x half>* %arg2) {
; KNL-LABEL: test_concat_v2i1:
; KNL: ## %bb.0:
; KNL-NEXT: movzwl (%rdi), %eax
; KNL-NEXT: movzwl 2(%rdi), %ecx
; KNL-NEXT: movzwl 2(%rdi), %eax
; KNL-NEXT: movzwl (%rdi), %ecx
; KNL-NEXT: vmovd %ecx, %xmm0
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0
; KNL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; KNL-NEXT: vucomiss %xmm1, %xmm0
; KNL-NEXT: setb %cl
; KNL-NEXT: andl $1, %ecx
; KNL-NEXT: kmovw %ecx, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: vmovd %eax, %xmm2
; KNL-NEXT: vcvtph2ps %xmm2, %xmm2
; KNL-NEXT: vucomiss %xmm1, %xmm2
; KNL-NEXT: setb %al
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: korw %k0, %k1, %k0
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; KNL-NEXT: vucomiss %xmm1, %xmm0
; KNL-NEXT: seta %al
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: vucomiss %xmm1, %xmm2
; KNL-NEXT: seta %al
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k2
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: kshiftlw $1, %k2, %k2
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k1
; KNL-NEXT: kmovw %k1, %ecx

View File

@ -5157,13 +5157,13 @@ define <64 x i1> @mask64_insert(i32 %a) {
; KNL-LABEL: mask64_insert:
; KNL: ## %bb.0:
; KNL-NEXT: movq %rdi, %rax
; KNL-NEXT: movw $-4, %cx
; KNL-NEXT: kmovw %ecx, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: andl $1, %esi
; KNL-NEXT: kmovw %esi, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: movw $-4, %cx
; KNL-NEXT: kmovw %ecx, %k1
; KNL-NEXT: kshiftrw $1, %k1, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: korw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, (%rdi)
; KNL-NEXT: movw $-3, 6(%rdi)
; KNL-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
@ -5198,13 +5198,13 @@ define <64 x i1> @mask64_insert(i32 %a) {
; AVX512DQ-LABEL: mask64_insert:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: movq %rdi, %rax
; AVX512DQ-NEXT: movw $-4, %cx
; AVX512DQ-NEXT: kmovw %ecx, %k0
; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQ-NEXT: andl $1, %esi
; AVX512DQ-NEXT: kmovw %esi, %k1
; AVX512DQ-NEXT: korw %k1, %k0, %k0
; AVX512DQ-NEXT: kmovw %esi, %k0
; AVX512DQ-NEXT: movw $-4, %cx
; AVX512DQ-NEXT: kmovw %ecx, %k1
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1
; AVX512DQ-NEXT: korw %k0, %k1, %k0
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
; AVX512DQ-NEXT: movw $-3, 6(%rdi)
; AVX512DQ-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD

View File

@ -1434,8 +1434,8 @@ define <4 x i32> @zext_bool_logic(<4 x i64> %cond1, <4 x i64> %cond2, <4 x i32>
define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
; KNL-LABEL: half_vec_compare:
; KNL: ## %bb.0: ## %entry
; KNL-NEXT: movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07]
; KNL-NEXT: movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02]
; KNL-NEXT: movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02]
; KNL-NEXT: movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f]
; KNL-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
@ -1443,17 +1443,17 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
; KNL-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1]
; KNL-NEXT: setne %dl ## encoding: [0x0f,0x95,0xc2]
; KNL-NEXT: orb %cl, %dl ## encoding: [0x08,0xca]
; KNL-NEXT: andl $1, %edx ## encoding: [0x83,0xe2,0x01]
; KNL-NEXT: kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2]
; KNL-NEXT: kshiftlw $1, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x01]
; KNL-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; KNL-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
; KNL-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
; KNL-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
; KNL-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
; KNL-NEXT: andl $1, %ecx ## encoding: [0x83,0xe1,0x01]
; KNL-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
; KNL-NEXT: korw %k0, %k1, %k1 ## encoding: [0xc5,0xf4,0x45,0xc8]
; KNL-NEXT: kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01]
; KNL-NEXT: korw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x45,0xc9]
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
; KNL-NEXT: vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0]
; KNL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
@ -1465,8 +1465,8 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
;
; AVX512BW-LABEL: half_vec_compare:
; AVX512BW: ## %bb.0: ## %entry
; AVX512BW-NEXT: movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07]
; AVX512BW-NEXT: movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02]
; AVX512BW-NEXT: movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02]
; AVX512BW-NEXT: movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f]
; AVX512BW-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
@ -1474,17 +1474,17 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
; AVX512BW-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1]
; AVX512BW-NEXT: setne %dl ## encoding: [0x0f,0x95,0xc2]
; AVX512BW-NEXT: orb %cl, %dl ## encoding: [0x08,0xca]
; AVX512BW-NEXT: kmovd %edx, %k0 ## encoding: [0xc5,0xfb,0x92,0xc2]
; AVX512BW-NEXT: kshiftlw $1, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x01]
; AVX512BW-NEXT: andl $1, %edx ## encoding: [0x83,0xe2,0x01]
; AVX512BW-NEXT: kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2]
; AVX512BW-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; AVX512BW-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
; AVX512BW-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
; AVX512BW-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
; AVX512BW-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
; AVX512BW-NEXT: andl $1, %ecx ## encoding: [0x83,0xe1,0x01]
; AVX512BW-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
; AVX512BW-NEXT: korw %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x45,0xc0]
; AVX512BW-NEXT: kmovd %ecx, %k1 ## encoding: [0xc5,0xfb,0x92,0xc9]
; AVX512BW-NEXT: kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01]
; AVX512BW-NEXT: korw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x45,0xc1]
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ## encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0]
; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]

View File

@ -1372,48 +1372,48 @@ define <2 x i32> @saddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
;
; AVX512-LABEL: saddo_v2i128:
; AVX512: # %bb.0:
; AVX512-NEXT: pushq %rbp
; AVX512-NEXT: pushq %r14
; AVX512-NEXT: pushq %rbx
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX512-NEXT: testq %r9, %r9
; AVX512-NEXT: setns %al
; AVX512-NEXT: testq %rsi, %rsi
; AVX512-NEXT: setns %bl
; AVX512-NEXT: cmpb %al, %bl
; AVX512-NEXT: sete %bpl
; AVX512-NEXT: addq %r8, %rdi
; AVX512-NEXT: adcq %r9, %rsi
; AVX512-NEXT: setns %al
; AVX512-NEXT: cmpb %al, %bl
; AVX512-NEXT: setne %al
; AVX512-NEXT: andb %bpl, %al
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: movq %rcx, %rbp
; AVX512-NEXT: adcq %r10, %rbp
; AVX512-NEXT: movq %rcx, %r14
; AVX512-NEXT: adcq %r11, %r14
; AVX512-NEXT: setns %bl
; AVX512-NEXT: testq %rcx, %rcx
; AVX512-NEXT: setns %cl
; AVX512-NEXT: cmpb %bl, %cl
; AVX512-NEXT: setne %r8b
; AVX512-NEXT: testq %r10, %r10
; AVX512-NEXT: setne %bl
; AVX512-NEXT: testq %r11, %r11
; AVX512-NEXT: setns %al
; AVX512-NEXT: cmpb %al, %cl
; AVX512-NEXT: sete %al
; AVX512-NEXT: andb %bl, %al
; AVX512-NEXT: kmovd %eax, %k0
; AVX512-NEXT: testq %r9, %r9
; AVX512-NEXT: setns %al
; AVX512-NEXT: testq %rsi, %rsi
; AVX512-NEXT: setns %cl
; AVX512-NEXT: cmpb %al, %cl
; AVX512-NEXT: sete %al
; AVX512-NEXT: addq %r8, %rdi
; AVX512-NEXT: adcq %r9, %rsi
; AVX512-NEXT: setns %bl
; AVX512-NEXT: cmpb %bl, %cl
; AVX512-NEXT: sete %cl
; AVX512-NEXT: andb %r8b, %cl
; AVX512-NEXT: kmovd %ecx, %k0
; AVX512-NEXT: setne %cl
; AVX512-NEXT: andb %al, %cl
; AVX512-NEXT: andl $1, %ecx
; AVX512-NEXT: kmovw %ecx, %k1
; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: andl $1, %eax
; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: movq %rdx, 16(%r11)
; AVX512-NEXT: movq %rdi, (%r11)
; AVX512-NEXT: movq %rbp, 24(%r11)
; AVX512-NEXT: movq %rsi, 8(%r11)
; AVX512-NEXT: movq %rdx, 16(%r10)
; AVX512-NEXT: movq %rdi, (%r10)
; AVX512-NEXT: movq %r14, 24(%r10)
; AVX512-NEXT: movq %rsi, 8(%r10)
; AVX512-NEXT: popq %rbx
; AVX512-NEXT: popq %rbp
; AVX512-NEXT: popq %r14
; AVX512-NEXT: retq
%t = call {<2 x i128>, <2 x i1>} @llvm.sadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0

View File

@ -3942,39 +3942,39 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; AVX512-NEXT: pushq %rbx
; AVX512-NEXT: subq $24, %rsp
; AVX512-NEXT: movq %r8, %rax
; AVX512-NEXT: movq %rcx, %r15
; AVX512-NEXT: movq %rcx, %r14
; AVX512-NEXT: movq %rdx, %rbx
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r15
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r13
; AVX512-NEXT: movq $0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %r8
; AVX512-NEXT: movq %rax, %rdx
; AVX512-NEXT: movq %r9, %rcx
; AVX512-NEXT: callq __muloti4
; AVX512-NEXT: movq %rax, %r14
; AVX512-NEXT: movq %rax, %r13
; AVX512-NEXT: movq %rdx, %rbp
; AVX512-NEXT: movq $0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %r8
; AVX512-NEXT: movq %rbx, %rdi
; AVX512-NEXT: movq %r15, %rsi
; AVX512-NEXT: movq %r14, %rsi
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: movq %r13, %rcx
; AVX512-NEXT: movq %r12, %rcx
; AVX512-NEXT: callq __muloti4
; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: setne %cl
; AVX512-NEXT: kmovd %ecx, %k0
; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: setne %cl
; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: andl $1, %ecx
; AVX512-NEXT: kmovw %ecx, %k1
; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: movq %rdx, 24(%r12)
; AVX512-NEXT: movq %rax, 16(%r12)
; AVX512-NEXT: movq %rbp, 8(%r12)
; AVX512-NEXT: movq %r14, (%r12)
; AVX512-NEXT: movq %rdx, 24(%r15)
; AVX512-NEXT: movq %rax, 16(%r15)
; AVX512-NEXT: movq %rbp, 8(%r15)
; AVX512-NEXT: movq %r13, (%r15)
; AVX512-NEXT: addq $24, %rsp
; AVX512-NEXT: popq %rbx
; AVX512-NEXT: popq %r12

View File

@ -1381,48 +1381,48 @@ define <2 x i32> @ssubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
;
; AVX512-LABEL: ssubo_v2i128:
; AVX512: # %bb.0:
; AVX512-NEXT: pushq %rbp
; AVX512-NEXT: pushq %r14
; AVX512-NEXT: pushq %rbx
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX512-NEXT: testq %r9, %r9
; AVX512-NEXT: setns %al
; AVX512-NEXT: testq %rsi, %rsi
; AVX512-NEXT: setns %bl
; AVX512-NEXT: cmpb %al, %bl
; AVX512-NEXT: setne %bpl
; AVX512-NEXT: subq %r8, %rdi
; AVX512-NEXT: sbbq %r9, %rsi
; AVX512-NEXT: setns %al
; AVX512-NEXT: cmpb %al, %bl
; AVX512-NEXT: setne %al
; AVX512-NEXT: andb %bpl, %al
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: movq %rcx, %rbp
; AVX512-NEXT: sbbq %r10, %rbp
; AVX512-NEXT: movq %rcx, %r14
; AVX512-NEXT: sbbq %r11, %r14
; AVX512-NEXT: setns %bl
; AVX512-NEXT: testq %rcx, %rcx
; AVX512-NEXT: setns %cl
; AVX512-NEXT: cmpb %bl, %cl
; AVX512-NEXT: setne %r8b
; AVX512-NEXT: testq %r10, %r10
; AVX512-NEXT: setne %bl
; AVX512-NEXT: testq %r11, %r11
; AVX512-NEXT: setns %al
; AVX512-NEXT: cmpb %al, %cl
; AVX512-NEXT: setne %al
; AVX512-NEXT: andb %bl, %al
; AVX512-NEXT: kmovd %eax, %k0
; AVX512-NEXT: testq %r9, %r9
; AVX512-NEXT: setns %al
; AVX512-NEXT: testq %rsi, %rsi
; AVX512-NEXT: setns %cl
; AVX512-NEXT: cmpb %al, %cl
; AVX512-NEXT: setne %al
; AVX512-NEXT: subq %r8, %rdi
; AVX512-NEXT: sbbq %r9, %rsi
; AVX512-NEXT: setns %bl
; AVX512-NEXT: cmpb %bl, %cl
; AVX512-NEXT: setne %cl
; AVX512-NEXT: andb %r8b, %cl
; AVX512-NEXT: kmovd %ecx, %k0
; AVX512-NEXT: andb %al, %cl
; AVX512-NEXT: andl $1, %ecx
; AVX512-NEXT: kmovw %ecx, %k1
; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: andl $1, %eax
; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: movq %rdx, 16(%r11)
; AVX512-NEXT: movq %rdi, (%r11)
; AVX512-NEXT: movq %rbp, 24(%r11)
; AVX512-NEXT: movq %rsi, 8(%r11)
; AVX512-NEXT: movq %rdx, 16(%r10)
; AVX512-NEXT: movq %rdi, (%r10)
; AVX512-NEXT: movq %r14, 24(%r10)
; AVX512-NEXT: movq %rsi, 8(%r10)
; AVX512-NEXT: popq %rbx
; AVX512-NEXT: popq %rbp
; AVX512-NEXT: popq %r14
; AVX512-NEXT: retq
%t = call {<2 x i128>, <2 x i1>} @llvm.ssub.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0

View File

@ -1282,16 +1282,16 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; AVX512-LABEL: uaddo_v2i128:
; AVX512: # %bb.0:
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX512-NEXT: addq %r8, %rdi
; AVX512-NEXT: adcq %r9, %rsi
; AVX512-NEXT: setb %r8b
; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
; AVX512-NEXT: setb %al
; AVX512-NEXT: kmovd %eax, %k0
; AVX512-NEXT: addq %r8, %rdi
; AVX512-NEXT: adcq %r9, %rsi
; AVX512-NEXT: setb %al
; AVX512-NEXT: andl $1, %eax
; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: andl $1, %r8d
; AVX512-NEXT: kmovw %r8d, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}

View File

@ -3689,68 +3689,66 @@ define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; AVX512-NEXT: pushq %r13
; AVX512-NEXT: pushq %r12
; AVX512-NEXT: pushq %rbx
; AVX512-NEXT: movq %r9, %r10
; AVX512-NEXT: movq %rcx, %r9
; AVX512-NEXT: movq %rdx, %r11
; AVX512-NEXT: movq %rsi, %rax
; AVX512-NEXT: movq %rdi, %rsi
; AVX512-NEXT: movq %rcx, %rax
; AVX512-NEXT: movq %rdx, %r12
; AVX512-NEXT: movq %rdi, %r11
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r14
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r15
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX512-NEXT: testq %r10, %r10
; AVX512-NEXT: setne %dl
; AVX512-NEXT: testq %rax, %rax
; AVX512-NEXT: setne %bl
; AVX512-NEXT: andb %dl, %bl
; AVX512-NEXT: mulq %r8
; AVX512-NEXT: movq %rax, %r13
; AVX512-NEXT: testq %rcx, %rcx
; AVX512-NEXT: setne %r13b
; AVX512-NEXT: andb %dl, %r13b
; AVX512-NEXT: mulq %r15
; AVX512-NEXT: movq %rax, %rdi
; AVX512-NEXT: seto %bpl
; AVX512-NEXT: movq %r10, %rax
; AVX512-NEXT: mulq %rdi
; AVX512-NEXT: movq %rax, %rdi
; AVX512-NEXT: seto %cl
; AVX512-NEXT: orb %bpl, %cl
; AVX512-NEXT: addq %r13, %rdi
; AVX512-NEXT: movq %rsi, %rax
; AVX512-NEXT: mulq %r8
; AVX512-NEXT: movq %rax, %r8
; AVX512-NEXT: movq %rdx, %r10
; AVX512-NEXT: addq %rdi, %r10
; AVX512-NEXT: setb %sil
; AVX512-NEXT: orb %cl, %sil
; AVX512-NEXT: orb %bl, %sil
; AVX512-NEXT: testq %r12, %r12
; AVX512-NEXT: setne %al
; AVX512-NEXT: testq %r9, %r9
; AVX512-NEXT: setne %bpl
; AVX512-NEXT: andb %al, %bpl
; AVX512-NEXT: movq %r9, %rax
; AVX512-NEXT: mulq %r15
; AVX512-NEXT: movq %rax, %rdi
; AVX512-NEXT: seto %r9b
; AVX512-NEXT: movq %r12, %rax
; AVX512-NEXT: mulq %r11
; AVX512-NEXT: mulq %r12
; AVX512-NEXT: movq %rax, %rbx
; AVX512-NEXT: seto %cl
; AVX512-NEXT: orb %r9b, %cl
; AVX512-NEXT: orb %bpl, %cl
; AVX512-NEXT: addq %rdi, %rbx
; AVX512-NEXT: movq %r11, %rax
; AVX512-NEXT: movq %r12, %rax
; AVX512-NEXT: mulq %r15
; AVX512-NEXT: addq %rbx, %rdx
; AVX512-NEXT: setb %dil
; AVX512-NEXT: orb %cl, %dil
; AVX512-NEXT: orb %bpl, %dil
; AVX512-NEXT: kmovd %edi, %k0
; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: movq %rax, %r10
; AVX512-NEXT: movq %rdx, %r15
; AVX512-NEXT: addq %rbx, %r15
; AVX512-NEXT: setb %al
; AVX512-NEXT: orb %cl, %al
; AVX512-NEXT: orb %r13b, %al
; AVX512-NEXT: kmovd %eax, %k0
; AVX512-NEXT: testq %r9, %r9
; AVX512-NEXT: setne %al
; AVX512-NEXT: testq %rsi, %rsi
; AVX512-NEXT: setne %cl
; AVX512-NEXT: andb %al, %cl
; AVX512-NEXT: movq %rsi, %rax
; AVX512-NEXT: mulq %r8
; AVX512-NEXT: movq %rax, %rsi
; AVX512-NEXT: seto %bpl
; AVX512-NEXT: movq %r9, %rax
; AVX512-NEXT: mulq %r11
; AVX512-NEXT: movq %rax, %rdi
; AVX512-NEXT: seto %bl
; AVX512-NEXT: orb %bpl, %bl
; AVX512-NEXT: addq %rsi, %rdi
; AVX512-NEXT: movq %r11, %rax
; AVX512-NEXT: mulq %r8
; AVX512-NEXT: addq %rdi, %rdx
; AVX512-NEXT: setb %sil
; AVX512-NEXT: orb %bl, %sil
; AVX512-NEXT: orb %cl, %sil
; AVX512-NEXT: andl $1, %esi
; AVX512-NEXT: kmovw %esi, %k1
; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: movq %rax, 16(%r14)
; AVX512-NEXT: movq %r8, (%r14)
; AVX512-NEXT: movq %rdx, 24(%r14)
; AVX512-NEXT: movq %r10, 8(%r14)
; AVX512-NEXT: movq %r10, 16(%r14)
; AVX512-NEXT: movq %rax, (%r14)
; AVX512-NEXT: movq %r15, 24(%r14)
; AVX512-NEXT: movq %rdx, 8(%r14)
; AVX512-NEXT: popq %rbx
; AVX512-NEXT: popq %r12
; AVX512-NEXT: popq %r13

View File

@ -1329,16 +1329,16 @@ define <2 x i32> @usubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; AVX512-LABEL: usubo_v2i128:
; AVX512: # %bb.0:
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX512-NEXT: subq %r8, %rdi
; AVX512-NEXT: sbbq %r9, %rsi
; AVX512-NEXT: setb %r8b
; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
; AVX512-NEXT: setb %al
; AVX512-NEXT: kmovd %eax, %k0
; AVX512-NEXT: subq %r8, %rdi
; AVX512-NEXT: sbbq %r9, %rsi
; AVX512-NEXT: setb %al
; AVX512-NEXT: andl $1, %eax
; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: andl $1, %r8d
; AVX512-NEXT: kmovw %r8d, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}