forked from OSchip/llvm-project
[X86] Remove a redundant COPY_TO_REGCLASS for VK16 after a KMOVWkr in an isel output pattern.
KMOVWkr produces VK16, there's no reason to copy it to VK16 again. Test changes are presumably because we were scheduling based on the COPY that is no longer there.
This commit is contained in:
parent
22cd6bee4a
commit
ba319ac47e
|
@ -2967,10 +2967,9 @@ let Predicates = [HasAVX512] in {
|
|||
|
||||
def : Pat<(insert_subvector (v16i1 immAllZerosV),
|
||||
(v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
|
||||
(COPY_TO_REGCLASS
|
||||
(KMOVWkr (AND32ri8
|
||||
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
|
||||
(i32 1))), VK16)>;
|
||||
(KMOVWkr (AND32ri8
|
||||
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
|
||||
(i32 1)))>;
|
||||
}
|
||||
|
||||
// Mask unary operation
|
||||
|
|
|
@ -1766,39 +1766,39 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
|
|||
define i16 @trunc_i32_to_i1(i32 %a) {
|
||||
; KNL-LABEL: trunc_i32_to_i1:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: movw $-4, %ax
|
||||
; KNL-NEXT: kmovw %eax, %k0
|
||||
; KNL-NEXT: kshiftrw $1, %k0, %k0
|
||||
; KNL-NEXT: kshiftlw $1, %k0, %k0
|
||||
; KNL-NEXT: andl $1, %edi
|
||||
; KNL-NEXT: kmovw %edi, %k1
|
||||
; KNL-NEXT: korw %k1, %k0, %k0
|
||||
; KNL-NEXT: kmovw %edi, %k0
|
||||
; KNL-NEXT: movw $-4, %ax
|
||||
; KNL-NEXT: kmovw %eax, %k1
|
||||
; KNL-NEXT: kshiftrw $1, %k1, %k1
|
||||
; KNL-NEXT: kshiftlw $1, %k1, %k1
|
||||
; KNL-NEXT: korw %k0, %k1, %k0
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_i32_to_i1:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: movw $-4, %ax
|
||||
; SKX-NEXT: kmovd %eax, %k0
|
||||
; SKX-NEXT: kshiftrw $1, %k0, %k0
|
||||
; SKX-NEXT: kshiftlw $1, %k0, %k0
|
||||
; SKX-NEXT: andl $1, %edi
|
||||
; SKX-NEXT: kmovw %edi, %k1
|
||||
; SKX-NEXT: korw %k1, %k0, %k0
|
||||
; SKX-NEXT: kmovw %edi, %k0
|
||||
; SKX-NEXT: movw $-4, %ax
|
||||
; SKX-NEXT: kmovd %eax, %k1
|
||||
; SKX-NEXT: kshiftrw $1, %k1, %k1
|
||||
; SKX-NEXT: kshiftlw $1, %k1, %k1
|
||||
; SKX-NEXT: korw %k0, %k1, %k0
|
||||
; SKX-NEXT: kmovd %k0, %eax
|
||||
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; SKX-NEXT: retq
|
||||
;
|
||||
; AVX512DQNOBW-LABEL: trunc_i32_to_i1:
|
||||
; AVX512DQNOBW: # %bb.0:
|
||||
; AVX512DQNOBW-NEXT: movw $-4, %ax
|
||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k0
|
||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
|
||||
; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
|
||||
; AVX512DQNOBW-NEXT: andl $1, %edi
|
||||
; AVX512DQNOBW-NEXT: kmovw %edi, %k1
|
||||
; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
|
||||
; AVX512DQNOBW-NEXT: kmovw %edi, %k0
|
||||
; AVX512DQNOBW-NEXT: movw $-4, %ax
|
||||
; AVX512DQNOBW-NEXT: kmovw %eax, %k1
|
||||
; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
|
||||
; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
|
||||
; AVX512DQNOBW-NEXT: korw %k0, %k1, %k0
|
||||
; AVX512DQNOBW-NEXT: kmovw %k0, %eax
|
||||
; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX512DQNOBW-NEXT: retq
|
||||
|
|
|
@ -2181,32 +2181,32 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index
|
|||
define void @test_concat_v2i1(<2 x half>* %arg, <2 x half>* %arg1, <2 x half>* %arg2) {
|
||||
; KNL-LABEL: test_concat_v2i1:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: movzwl (%rdi), %eax
|
||||
; KNL-NEXT: movzwl 2(%rdi), %ecx
|
||||
; KNL-NEXT: movzwl 2(%rdi), %eax
|
||||
; KNL-NEXT: movzwl (%rdi), %ecx
|
||||
; KNL-NEXT: vmovd %ecx, %xmm0
|
||||
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0
|
||||
; KNL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||||
; KNL-NEXT: vucomiss %xmm1, %xmm0
|
||||
; KNL-NEXT: setb %cl
|
||||
; KNL-NEXT: andl $1, %ecx
|
||||
; KNL-NEXT: kmovw %ecx, %k0
|
||||
; KNL-NEXT: kshiftlw $1, %k0, %k0
|
||||
; KNL-NEXT: vmovd %eax, %xmm2
|
||||
; KNL-NEXT: vcvtph2ps %xmm2, %xmm2
|
||||
; KNL-NEXT: vucomiss %xmm1, %xmm2
|
||||
; KNL-NEXT: setb %al
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: kmovw %eax, %k1
|
||||
; KNL-NEXT: korw %k0, %k1, %k0
|
||||
; KNL-NEXT: kshiftlw $1, %k1, %k1
|
||||
; KNL-NEXT: korw %k1, %k0, %k0
|
||||
; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; KNL-NEXT: vucomiss %xmm1, %xmm0
|
||||
; KNL-NEXT: seta %al
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: kmovw %eax, %k1
|
||||
; KNL-NEXT: kshiftlw $1, %k1, %k1
|
||||
; KNL-NEXT: vucomiss %xmm1, %xmm2
|
||||
; KNL-NEXT: seta %al
|
||||
; KNL-NEXT: andl $1, %eax
|
||||
; KNL-NEXT: kmovw %eax, %k2
|
||||
; KNL-NEXT: korw %k1, %k2, %k1
|
||||
; KNL-NEXT: kshiftlw $1, %k2, %k2
|
||||
; KNL-NEXT: korw %k2, %k1, %k1
|
||||
; KNL-NEXT: kandw %k1, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $1, %k0, %k1
|
||||
; KNL-NEXT: kmovw %k1, %ecx
|
||||
|
|
|
@ -5157,13 +5157,13 @@ define <64 x i1> @mask64_insert(i32 %a) {
|
|||
; KNL-LABEL: mask64_insert:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: movq %rdi, %rax
|
||||
; KNL-NEXT: movw $-4, %cx
|
||||
; KNL-NEXT: kmovw %ecx, %k0
|
||||
; KNL-NEXT: kshiftrw $1, %k0, %k0
|
||||
; KNL-NEXT: kshiftlw $1, %k0, %k0
|
||||
; KNL-NEXT: andl $1, %esi
|
||||
; KNL-NEXT: kmovw %esi, %k1
|
||||
; KNL-NEXT: korw %k1, %k0, %k0
|
||||
; KNL-NEXT: kmovw %esi, %k0
|
||||
; KNL-NEXT: movw $-4, %cx
|
||||
; KNL-NEXT: kmovw %ecx, %k1
|
||||
; KNL-NEXT: kshiftrw $1, %k1, %k1
|
||||
; KNL-NEXT: kshiftlw $1, %k1, %k1
|
||||
; KNL-NEXT: korw %k0, %k1, %k0
|
||||
; KNL-NEXT: kmovw %k0, (%rdi)
|
||||
; KNL-NEXT: movw $-3, 6(%rdi)
|
||||
; KNL-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
|
||||
|
@ -5198,13 +5198,13 @@ define <64 x i1> @mask64_insert(i32 %a) {
|
|||
; AVX512DQ-LABEL: mask64_insert:
|
||||
; AVX512DQ: ## %bb.0:
|
||||
; AVX512DQ-NEXT: movq %rdi, %rax
|
||||
; AVX512DQ-NEXT: movw $-4, %cx
|
||||
; AVX512DQ-NEXT: kmovw %ecx, %k0
|
||||
; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
|
||||
; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
|
||||
; AVX512DQ-NEXT: andl $1, %esi
|
||||
; AVX512DQ-NEXT: kmovw %esi, %k1
|
||||
; AVX512DQ-NEXT: korw %k1, %k0, %k0
|
||||
; AVX512DQ-NEXT: kmovw %esi, %k0
|
||||
; AVX512DQ-NEXT: movw $-4, %cx
|
||||
; AVX512DQ-NEXT: kmovw %ecx, %k1
|
||||
; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
|
||||
; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1
|
||||
; AVX512DQ-NEXT: korw %k0, %k1, %k0
|
||||
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
|
||||
; AVX512DQ-NEXT: movw $-3, 6(%rdi)
|
||||
; AVX512DQ-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
|
||||
|
|
|
@ -1434,8 +1434,8 @@ define <4 x i32> @zext_bool_logic(<4 x i64> %cond1, <4 x i64> %cond2, <4 x i32>
|
|||
define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
|
||||
; KNL-LABEL: half_vec_compare:
|
||||
; KNL: ## %bb.0: ## %entry
|
||||
; KNL-NEXT: movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07]
|
||||
; KNL-NEXT: movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02]
|
||||
; KNL-NEXT: movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02]
|
||||
; KNL-NEXT: movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f]
|
||||
; KNL-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
|
||||
; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
|
||||
|
@ -1443,17 +1443,17 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
|
|||
; KNL-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1]
|
||||
; KNL-NEXT: setne %dl ## encoding: [0x0f,0x95,0xc2]
|
||||
; KNL-NEXT: orb %cl, %dl ## encoding: [0x08,0xca]
|
||||
; KNL-NEXT: andl $1, %edx ## encoding: [0x83,0xe2,0x01]
|
||||
; KNL-NEXT: kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2]
|
||||
; KNL-NEXT: kshiftlw $1, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x01]
|
||||
; KNL-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
|
||||
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
|
||||
; KNL-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
|
||||
; KNL-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
|
||||
; KNL-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
|
||||
; KNL-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
|
||||
; KNL-NEXT: andl $1, %ecx ## encoding: [0x83,0xe1,0x01]
|
||||
; KNL-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
|
||||
; KNL-NEXT: korw %k0, %k1, %k1 ## encoding: [0xc5,0xf4,0x45,0xc8]
|
||||
; KNL-NEXT: kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01]
|
||||
; KNL-NEXT: korw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x45,0xc9]
|
||||
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
|
||||
; KNL-NEXT: vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0]
|
||||
; KNL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
|
||||
|
@ -1465,8 +1465,8 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
|
|||
;
|
||||
; AVX512BW-LABEL: half_vec_compare:
|
||||
; AVX512BW: ## %bb.0: ## %entry
|
||||
; AVX512BW-NEXT: movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07]
|
||||
; AVX512BW-NEXT: movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02]
|
||||
; AVX512BW-NEXT: movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02]
|
||||
; AVX512BW-NEXT: movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f]
|
||||
; AVX512BW-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
|
||||
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
|
||||
; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
|
||||
|
@ -1474,17 +1474,17 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
|
|||
; AVX512BW-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1]
|
||||
; AVX512BW-NEXT: setne %dl ## encoding: [0x0f,0x95,0xc2]
|
||||
; AVX512BW-NEXT: orb %cl, %dl ## encoding: [0x08,0xca]
|
||||
; AVX512BW-NEXT: kmovd %edx, %k0 ## encoding: [0xc5,0xfb,0x92,0xc2]
|
||||
; AVX512BW-NEXT: kshiftlw $1, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x01]
|
||||
; AVX512BW-NEXT: andl $1, %edx ## encoding: [0x83,0xe2,0x01]
|
||||
; AVX512BW-NEXT: kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2]
|
||||
; AVX512BW-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
|
||||
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
|
||||
; AVX512BW-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
|
||||
; AVX512BW-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
|
||||
; AVX512BW-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
|
||||
; AVX512BW-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
|
||||
; AVX512BW-NEXT: andl $1, %ecx ## encoding: [0x83,0xe1,0x01]
|
||||
; AVX512BW-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
|
||||
; AVX512BW-NEXT: korw %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x45,0xc0]
|
||||
; AVX512BW-NEXT: kmovd %ecx, %k1 ## encoding: [0xc5,0xfb,0x92,0xc9]
|
||||
; AVX512BW-NEXT: kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01]
|
||||
; AVX512BW-NEXT: korw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x45,0xc1]
|
||||
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ## encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0]
|
||||
; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
|
||||
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
|
||||
|
|
|
@ -1372,48 +1372,48 @@ define <2 x i32> @saddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
|
|||
;
|
||||
; AVX512-LABEL: saddo_v2i128:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: pushq %rbp
|
||||
; AVX512-NEXT: pushq %r14
|
||||
; AVX512-NEXT: pushq %rbx
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512-NEXT: testq %r9, %r9
|
||||
; AVX512-NEXT: setns %al
|
||||
; AVX512-NEXT: testq %rsi, %rsi
|
||||
; AVX512-NEXT: setns %bl
|
||||
; AVX512-NEXT: cmpb %al, %bl
|
||||
; AVX512-NEXT: sete %bpl
|
||||
; AVX512-NEXT: addq %r8, %rdi
|
||||
; AVX512-NEXT: adcq %r9, %rsi
|
||||
; AVX512-NEXT: setns %al
|
||||
; AVX512-NEXT: cmpb %al, %bl
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: andb %bpl, %al
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx
|
||||
; AVX512-NEXT: movq %rcx, %rbp
|
||||
; AVX512-NEXT: adcq %r10, %rbp
|
||||
; AVX512-NEXT: movq %rcx, %r14
|
||||
; AVX512-NEXT: adcq %r11, %r14
|
||||
; AVX512-NEXT: setns %bl
|
||||
; AVX512-NEXT: testq %rcx, %rcx
|
||||
; AVX512-NEXT: setns %cl
|
||||
; AVX512-NEXT: cmpb %bl, %cl
|
||||
; AVX512-NEXT: setne %r8b
|
||||
; AVX512-NEXT: testq %r10, %r10
|
||||
; AVX512-NEXT: setne %bl
|
||||
; AVX512-NEXT: testq %r11, %r11
|
||||
; AVX512-NEXT: setns %al
|
||||
; AVX512-NEXT: cmpb %al, %cl
|
||||
; AVX512-NEXT: sete %al
|
||||
; AVX512-NEXT: andb %bl, %al
|
||||
; AVX512-NEXT: kmovd %eax, %k0
|
||||
; AVX512-NEXT: testq %r9, %r9
|
||||
; AVX512-NEXT: setns %al
|
||||
; AVX512-NEXT: testq %rsi, %rsi
|
||||
; AVX512-NEXT: setns %cl
|
||||
; AVX512-NEXT: cmpb %al, %cl
|
||||
; AVX512-NEXT: sete %al
|
||||
; AVX512-NEXT: addq %r8, %rdi
|
||||
; AVX512-NEXT: adcq %r9, %rsi
|
||||
; AVX512-NEXT: setns %bl
|
||||
; AVX512-NEXT: cmpb %bl, %cl
|
||||
; AVX512-NEXT: sete %cl
|
||||
; AVX512-NEXT: andb %r8b, %cl
|
||||
; AVX512-NEXT: kmovd %ecx, %k0
|
||||
; AVX512-NEXT: setne %cl
|
||||
; AVX512-NEXT: andb %al, %cl
|
||||
; AVX512-NEXT: andl $1, %ecx
|
||||
; AVX512-NEXT: kmovw %ecx, %k1
|
||||
; AVX512-NEXT: kshiftlw $1, %k0, %k0
|
||||
; AVX512-NEXT: andl $1, %eax
|
||||
; AVX512-NEXT: kmovw %eax, %k1
|
||||
; AVX512-NEXT: korw %k0, %k1, %k1
|
||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: movq %rdx, 16(%r11)
|
||||
; AVX512-NEXT: movq %rdi, (%r11)
|
||||
; AVX512-NEXT: movq %rbp, 24(%r11)
|
||||
; AVX512-NEXT: movq %rsi, 8(%r11)
|
||||
; AVX512-NEXT: movq %rdx, 16(%r10)
|
||||
; AVX512-NEXT: movq %rdi, (%r10)
|
||||
; AVX512-NEXT: movq %r14, 24(%r10)
|
||||
; AVX512-NEXT: movq %rsi, 8(%r10)
|
||||
; AVX512-NEXT: popq %rbx
|
||||
; AVX512-NEXT: popq %rbp
|
||||
; AVX512-NEXT: popq %r14
|
||||
; AVX512-NEXT: retq
|
||||
%t = call {<2 x i128>, <2 x i1>} @llvm.sadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
|
||||
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0
|
||||
|
|
|
@ -3942,39 +3942,39 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
|
|||
; AVX512-NEXT: pushq %rbx
|
||||
; AVX512-NEXT: subq $24, %rsp
|
||||
; AVX512-NEXT: movq %r8, %rax
|
||||
; AVX512-NEXT: movq %rcx, %r15
|
||||
; AVX512-NEXT: movq %rcx, %r14
|
||||
; AVX512-NEXT: movq %rdx, %rbx
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r15
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r13
|
||||
; AVX512-NEXT: movq $0, {{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %r8
|
||||
; AVX512-NEXT: movq %rax, %rdx
|
||||
; AVX512-NEXT: movq %r9, %rcx
|
||||
; AVX512-NEXT: callq __muloti4
|
||||
; AVX512-NEXT: movq %rax, %r14
|
||||
; AVX512-NEXT: movq %rax, %r13
|
||||
; AVX512-NEXT: movq %rdx, %rbp
|
||||
; AVX512-NEXT: movq $0, {{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %r8
|
||||
; AVX512-NEXT: movq %rbx, %rdi
|
||||
; AVX512-NEXT: movq %r15, %rsi
|
||||
; AVX512-NEXT: movq %r14, %rsi
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx
|
||||
; AVX512-NEXT: movq %r13, %rcx
|
||||
; AVX512-NEXT: movq %r12, %rcx
|
||||
; AVX512-NEXT: callq __muloti4
|
||||
; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: setne %cl
|
||||
; AVX512-NEXT: kmovd %ecx, %k0
|
||||
; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp)
|
||||
; AVX512-NEXT: setne %cl
|
||||
; AVX512-NEXT: kshiftlw $1, %k0, %k0
|
||||
; AVX512-NEXT: andl $1, %ecx
|
||||
; AVX512-NEXT: kmovw %ecx, %k1
|
||||
; AVX512-NEXT: kshiftlw $1, %k0, %k0
|
||||
; AVX512-NEXT: korw %k0, %k1, %k1
|
||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: movq %rdx, 24(%r12)
|
||||
; AVX512-NEXT: movq %rax, 16(%r12)
|
||||
; AVX512-NEXT: movq %rbp, 8(%r12)
|
||||
; AVX512-NEXT: movq %r14, (%r12)
|
||||
; AVX512-NEXT: movq %rdx, 24(%r15)
|
||||
; AVX512-NEXT: movq %rax, 16(%r15)
|
||||
; AVX512-NEXT: movq %rbp, 8(%r15)
|
||||
; AVX512-NEXT: movq %r13, (%r15)
|
||||
; AVX512-NEXT: addq $24, %rsp
|
||||
; AVX512-NEXT: popq %rbx
|
||||
; AVX512-NEXT: popq %r12
|
||||
|
|
|
@ -1381,48 +1381,48 @@ define <2 x i32> @ssubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
|
|||
;
|
||||
; AVX512-LABEL: ssubo_v2i128:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: pushq %rbp
|
||||
; AVX512-NEXT: pushq %r14
|
||||
; AVX512-NEXT: pushq %rbx
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512-NEXT: testq %r9, %r9
|
||||
; AVX512-NEXT: setns %al
|
||||
; AVX512-NEXT: testq %rsi, %rsi
|
||||
; AVX512-NEXT: setns %bl
|
||||
; AVX512-NEXT: cmpb %al, %bl
|
||||
; AVX512-NEXT: setne %bpl
|
||||
; AVX512-NEXT: subq %r8, %rdi
|
||||
; AVX512-NEXT: sbbq %r9, %rsi
|
||||
; AVX512-NEXT: setns %al
|
||||
; AVX512-NEXT: cmpb %al, %bl
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: andb %bpl, %al
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
|
||||
; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx
|
||||
; AVX512-NEXT: movq %rcx, %rbp
|
||||
; AVX512-NEXT: sbbq %r10, %rbp
|
||||
; AVX512-NEXT: movq %rcx, %r14
|
||||
; AVX512-NEXT: sbbq %r11, %r14
|
||||
; AVX512-NEXT: setns %bl
|
||||
; AVX512-NEXT: testq %rcx, %rcx
|
||||
; AVX512-NEXT: setns %cl
|
||||
; AVX512-NEXT: cmpb %bl, %cl
|
||||
; AVX512-NEXT: setne %r8b
|
||||
; AVX512-NEXT: testq %r10, %r10
|
||||
; AVX512-NEXT: setne %bl
|
||||
; AVX512-NEXT: testq %r11, %r11
|
||||
; AVX512-NEXT: setns %al
|
||||
; AVX512-NEXT: cmpb %al, %cl
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: andb %bl, %al
|
||||
; AVX512-NEXT: kmovd %eax, %k0
|
||||
; AVX512-NEXT: testq %r9, %r9
|
||||
; AVX512-NEXT: setns %al
|
||||
; AVX512-NEXT: testq %rsi, %rsi
|
||||
; AVX512-NEXT: setns %cl
|
||||
; AVX512-NEXT: cmpb %al, %cl
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: subq %r8, %rdi
|
||||
; AVX512-NEXT: sbbq %r9, %rsi
|
||||
; AVX512-NEXT: setns %bl
|
||||
; AVX512-NEXT: cmpb %bl, %cl
|
||||
; AVX512-NEXT: setne %cl
|
||||
; AVX512-NEXT: andb %r8b, %cl
|
||||
; AVX512-NEXT: kmovd %ecx, %k0
|
||||
; AVX512-NEXT: andb %al, %cl
|
||||
; AVX512-NEXT: andl $1, %ecx
|
||||
; AVX512-NEXT: kmovw %ecx, %k1
|
||||
; AVX512-NEXT: kshiftlw $1, %k0, %k0
|
||||
; AVX512-NEXT: andl $1, %eax
|
||||
; AVX512-NEXT: kmovw %eax, %k1
|
||||
; AVX512-NEXT: korw %k0, %k1, %k1
|
||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: movq %rdx, 16(%r11)
|
||||
; AVX512-NEXT: movq %rdi, (%r11)
|
||||
; AVX512-NEXT: movq %rbp, 24(%r11)
|
||||
; AVX512-NEXT: movq %rsi, 8(%r11)
|
||||
; AVX512-NEXT: movq %rdx, 16(%r10)
|
||||
; AVX512-NEXT: movq %rdi, (%r10)
|
||||
; AVX512-NEXT: movq %r14, 24(%r10)
|
||||
; AVX512-NEXT: movq %rsi, 8(%r10)
|
||||
; AVX512-NEXT: popq %rbx
|
||||
; AVX512-NEXT: popq %rbp
|
||||
; AVX512-NEXT: popq %r14
|
||||
; AVX512-NEXT: retq
|
||||
%t = call {<2 x i128>, <2 x i1>} @llvm.ssub.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
|
||||
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0
|
||||
|
|
|
@ -1282,16 +1282,16 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
|
|||
; AVX512-LABEL: uaddo_v2i128:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512-NEXT: addq %r8, %rdi
|
||||
; AVX512-NEXT: adcq %r9, %rsi
|
||||
; AVX512-NEXT: setb %r8b
|
||||
; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx
|
||||
; AVX512-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX512-NEXT: setb %al
|
||||
; AVX512-NEXT: kmovd %eax, %k0
|
||||
; AVX512-NEXT: addq %r8, %rdi
|
||||
; AVX512-NEXT: adcq %r9, %rsi
|
||||
; AVX512-NEXT: setb %al
|
||||
; AVX512-NEXT: andl $1, %eax
|
||||
; AVX512-NEXT: kmovw %eax, %k1
|
||||
; AVX512-NEXT: kshiftlw $1, %k0, %k0
|
||||
; AVX512-NEXT: andl $1, %r8d
|
||||
; AVX512-NEXT: kmovw %r8d, %k1
|
||||
; AVX512-NEXT: korw %k0, %k1, %k1
|
||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
|
|
|
@ -3689,68 +3689,66 @@ define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
|
|||
; AVX512-NEXT: pushq %r13
|
||||
; AVX512-NEXT: pushq %r12
|
||||
; AVX512-NEXT: pushq %rbx
|
||||
; AVX512-NEXT: movq %r9, %r10
|
||||
; AVX512-NEXT: movq %rcx, %r9
|
||||
; AVX512-NEXT: movq %rdx, %r11
|
||||
; AVX512-NEXT: movq %rsi, %rax
|
||||
; AVX512-NEXT: movq %rdi, %rsi
|
||||
; AVX512-NEXT: movq %rcx, %rax
|
||||
; AVX512-NEXT: movq %rdx, %r12
|
||||
; AVX512-NEXT: movq %rdi, %r11
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r14
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r15
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512-NEXT: testq %r10, %r10
|
||||
; AVX512-NEXT: setne %dl
|
||||
; AVX512-NEXT: testq %rax, %rax
|
||||
; AVX512-NEXT: setne %bl
|
||||
; AVX512-NEXT: andb %dl, %bl
|
||||
; AVX512-NEXT: mulq %r8
|
||||
; AVX512-NEXT: movq %rax, %r13
|
||||
; AVX512-NEXT: testq %rcx, %rcx
|
||||
; AVX512-NEXT: setne %r13b
|
||||
; AVX512-NEXT: andb %dl, %r13b
|
||||
; AVX512-NEXT: mulq %r15
|
||||
; AVX512-NEXT: movq %rax, %rdi
|
||||
; AVX512-NEXT: seto %bpl
|
||||
; AVX512-NEXT: movq %r10, %rax
|
||||
; AVX512-NEXT: mulq %rdi
|
||||
; AVX512-NEXT: movq %rax, %rdi
|
||||
; AVX512-NEXT: seto %cl
|
||||
; AVX512-NEXT: orb %bpl, %cl
|
||||
; AVX512-NEXT: addq %r13, %rdi
|
||||
; AVX512-NEXT: movq %rsi, %rax
|
||||
; AVX512-NEXT: mulq %r8
|
||||
; AVX512-NEXT: movq %rax, %r8
|
||||
; AVX512-NEXT: movq %rdx, %r10
|
||||
; AVX512-NEXT: addq %rdi, %r10
|
||||
; AVX512-NEXT: setb %sil
|
||||
; AVX512-NEXT: orb %cl, %sil
|
||||
; AVX512-NEXT: orb %bl, %sil
|
||||
; AVX512-NEXT: testq %r12, %r12
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: testq %r9, %r9
|
||||
; AVX512-NEXT: setne %bpl
|
||||
; AVX512-NEXT: andb %al, %bpl
|
||||
; AVX512-NEXT: movq %r9, %rax
|
||||
; AVX512-NEXT: mulq %r15
|
||||
; AVX512-NEXT: movq %rax, %rdi
|
||||
; AVX512-NEXT: seto %r9b
|
||||
; AVX512-NEXT: movq %r12, %rax
|
||||
; AVX512-NEXT: mulq %r11
|
||||
; AVX512-NEXT: mulq %r12
|
||||
; AVX512-NEXT: movq %rax, %rbx
|
||||
; AVX512-NEXT: seto %cl
|
||||
; AVX512-NEXT: orb %r9b, %cl
|
||||
; AVX512-NEXT: orb %bpl, %cl
|
||||
; AVX512-NEXT: addq %rdi, %rbx
|
||||
; AVX512-NEXT: movq %r11, %rax
|
||||
; AVX512-NEXT: movq %r12, %rax
|
||||
; AVX512-NEXT: mulq %r15
|
||||
; AVX512-NEXT: addq %rbx, %rdx
|
||||
; AVX512-NEXT: setb %dil
|
||||
; AVX512-NEXT: orb %cl, %dil
|
||||
; AVX512-NEXT: orb %bpl, %dil
|
||||
; AVX512-NEXT: kmovd %edi, %k0
|
||||
; AVX512-NEXT: kshiftlw $1, %k0, %k0
|
||||
; AVX512-NEXT: movq %rax, %r10
|
||||
; AVX512-NEXT: movq %rdx, %r15
|
||||
; AVX512-NEXT: addq %rbx, %r15
|
||||
; AVX512-NEXT: setb %al
|
||||
; AVX512-NEXT: orb %cl, %al
|
||||
; AVX512-NEXT: orb %r13b, %al
|
||||
; AVX512-NEXT: kmovd %eax, %k0
|
||||
; AVX512-NEXT: testq %r9, %r9
|
||||
; AVX512-NEXT: setne %al
|
||||
; AVX512-NEXT: testq %rsi, %rsi
|
||||
; AVX512-NEXT: setne %cl
|
||||
; AVX512-NEXT: andb %al, %cl
|
||||
; AVX512-NEXT: movq %rsi, %rax
|
||||
; AVX512-NEXT: mulq %r8
|
||||
; AVX512-NEXT: movq %rax, %rsi
|
||||
; AVX512-NEXT: seto %bpl
|
||||
; AVX512-NEXT: movq %r9, %rax
|
||||
; AVX512-NEXT: mulq %r11
|
||||
; AVX512-NEXT: movq %rax, %rdi
|
||||
; AVX512-NEXT: seto %bl
|
||||
; AVX512-NEXT: orb %bpl, %bl
|
||||
; AVX512-NEXT: addq %rsi, %rdi
|
||||
; AVX512-NEXT: movq %r11, %rax
|
||||
; AVX512-NEXT: mulq %r8
|
||||
; AVX512-NEXT: addq %rdi, %rdx
|
||||
; AVX512-NEXT: setb %sil
|
||||
; AVX512-NEXT: orb %bl, %sil
|
||||
; AVX512-NEXT: orb %cl, %sil
|
||||
; AVX512-NEXT: andl $1, %esi
|
||||
; AVX512-NEXT: kmovw %esi, %k1
|
||||
; AVX512-NEXT: kshiftlw $1, %k0, %k0
|
||||
; AVX512-NEXT: korw %k0, %k1, %k1
|
||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; AVX512-NEXT: movq %rax, 16(%r14)
|
||||
; AVX512-NEXT: movq %r8, (%r14)
|
||||
; AVX512-NEXT: movq %rdx, 24(%r14)
|
||||
; AVX512-NEXT: movq %r10, 8(%r14)
|
||||
; AVX512-NEXT: movq %r10, 16(%r14)
|
||||
; AVX512-NEXT: movq %rax, (%r14)
|
||||
; AVX512-NEXT: movq %r15, 24(%r14)
|
||||
; AVX512-NEXT: movq %rdx, 8(%r14)
|
||||
; AVX512-NEXT: popq %rbx
|
||||
; AVX512-NEXT: popq %r12
|
||||
; AVX512-NEXT: popq %r13
|
||||
|
|
|
@ -1329,16 +1329,16 @@ define <2 x i32> @usubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
|
|||
; AVX512-LABEL: usubo_v2i128:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
|
||||
; AVX512-NEXT: subq %r8, %rdi
|
||||
; AVX512-NEXT: sbbq %r9, %rsi
|
||||
; AVX512-NEXT: setb %r8b
|
||||
; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx
|
||||
; AVX512-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
|
||||
; AVX512-NEXT: setb %al
|
||||
; AVX512-NEXT: kmovd %eax, %k0
|
||||
; AVX512-NEXT: subq %r8, %rdi
|
||||
; AVX512-NEXT: sbbq %r9, %rsi
|
||||
; AVX512-NEXT: setb %al
|
||||
; AVX512-NEXT: andl $1, %eax
|
||||
; AVX512-NEXT: kmovw %eax, %k1
|
||||
; AVX512-NEXT: kshiftlw $1, %k0, %k0
|
||||
; AVX512-NEXT: andl $1, %r8d
|
||||
; AVX512-NEXT: kmovw %r8d, %k1
|
||||
; AVX512-NEXT: korw %k0, %k1, %k1
|
||||
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
|
|
Loading…
Reference in New Issue