llvm-project/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll

60249 lines
2.4 MiB

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi0:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi3:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi4:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi5:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi6:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi7:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi8:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi9:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi10:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi11:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi12:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi13:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi14:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi15:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi16:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi17:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi18:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi19:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi20:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi21:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi22:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi23:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi24:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi25:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi26:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi27:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi28:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi29:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi30:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi31:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi32:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi33:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi34:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi35:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi36:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi37:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi38:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi39:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi40:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi41:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi42:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi43:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi44:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi45:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi46:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi47:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi48:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi49:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi50:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi51:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi52:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi53:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi54:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi55:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi56:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi57:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi58:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi59:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi60:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi61:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi62:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi63:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp eq <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi64:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi65:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi66:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp eq <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi67:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi68:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi69:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp eq <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi70:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi71:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi72:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp eq <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqb (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi73:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi74:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi75:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp eq <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi76:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi77:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi78:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi79:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi80:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi81:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi82:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi83:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi84:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi85:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi86:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi87:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi88:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi89:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi90:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi91:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi92:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi93:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi94:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi95:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi96:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi97:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi98:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi99:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp eq <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi100:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi101:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi102:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi103:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi104:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi105:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi106:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi107:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi108:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi109:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi110:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi111:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi112:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi113:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi114:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi115:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi116:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi117:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi118:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi119:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi120:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi121:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi122:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi123:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi124:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi125:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi126:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi127:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi128:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi129:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi130:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi131:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi132:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi133:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi134:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi135:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi136:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi137:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi138:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi139:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi140:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi141:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi142:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi143:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi144:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi145:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi146:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi147:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi148:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi149:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi150:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi151:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi152:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi153:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi154:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi155:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi156:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi157:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi158:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi159:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi160:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi161:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi162:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi163:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp eq <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi164:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi165:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi166:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm0
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp eq <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqw (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi167:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi168:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi169:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm0
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp eq <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi170:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi171:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi172:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm3
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm4, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp eq <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi173:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi174:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi175:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; NoVLX-NEXT: vmovq %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm2, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm3, %ymm3
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp eq <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi176:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi177:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi178:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi179:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi180:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi181:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi182:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi183:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi184:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi185:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi186:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi187:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi188:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi189:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi190:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi191:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi192:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi193:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi194:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi195:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi196:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi197:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi198:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi199:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi200:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi201:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi202:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi203:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi204:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi205:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi206:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi207:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi208:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi209:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi210:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi211:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi212:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi213:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi214:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi215:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi216:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi217:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi218:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi219:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi220:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi221:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi222:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi223:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi224:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi225:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi226:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi227:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi228:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi229:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi230:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi231:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi232:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi233:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi234:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi235:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi236:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi237:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi238:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi239:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi240:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi241:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi242:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi243:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi244:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi245:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi246:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi247:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi248:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi249:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi250:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi251:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi252:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi253:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi254:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi255:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi256:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi257:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi258:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi259:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi260:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi261:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi262:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi263:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi264:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi265:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi266:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi267:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi268:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi269:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi270:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi271:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi272:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi273:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi274:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi275:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi276:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi277:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi278:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi279:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi280:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi281:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi282:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi283:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi284:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi285:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi286:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi287:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi288:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi289:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi290:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi291:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi292:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi293:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi294:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi295:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi296:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi297:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi298:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi299:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi300:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi301:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi302:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi303:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi304:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi305:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi306:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi307:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi308:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi309:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi310:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi311:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi312:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi313:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi314:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi315:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi316:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi317:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi318:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi319:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi320:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi321:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi322:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi323:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi324:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi325:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi326:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi327:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp eq <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi328:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi329:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi330:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi331:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi332:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi333:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi334:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi335:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi336:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi337:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi338:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi339:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi340:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi341:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi342:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi343:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi344:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi345:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi346:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi347:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi348:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi349:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi350:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi351:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi352:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi353:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi354:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi355:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi356:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi357:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi358:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi359:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi360:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi361:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi362:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi363:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi364:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi365:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi366:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi367:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi368:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi369:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi370:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi371:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi372:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi373:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi374:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi375:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi376:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi377:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi378:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi379:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp eq <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi380:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi381:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi382:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi383:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi384:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi385:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi386:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi387:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi388:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi389:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi390:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi391:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi392:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi393:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi394:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi395:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi396:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi397:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi398:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi399:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi400:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi401:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi402:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi403:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi404:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi405:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi406:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi407:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi408:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi409:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi410:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi411:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi412:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi413:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi414:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi415:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi416:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi417:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi418:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi419:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi420:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi421:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi422:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi423:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi424:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi425:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi426:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi427:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi428:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi429:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi430:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi431:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi432:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi433:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi434:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi435:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi436:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi437:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi438:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi439:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi440:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi441:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi442:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi443:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi444:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi445:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi446:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi447:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi448:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi449:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi450:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi451:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp eq <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi452:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi453:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi454:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi455:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi456:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi457:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi458:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi459:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi460:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi461:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi462:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi463:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi464:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi465:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi466:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi467:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi468:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi469:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi470:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi471:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi472:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi473:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi474:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi475:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi476:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi477:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi478:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi479:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi480:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi481:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi482:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi483:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi484:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi485:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi486:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi487:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi488:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi489:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi490:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi491:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi492:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi493:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi494:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi495:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi496:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi497:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi498:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi499:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi500:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi501:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi502:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi503:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi504:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi505:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi506:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi507:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi508:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi509:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi510:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi511:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi512:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi513:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi514:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi515:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sgt <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi516:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi517:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi518:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp sgt <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi519:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi520:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi521:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp sgt <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi522:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi523:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi524:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp sgt <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtb (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi525:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi526:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi527:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp sgt <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi528:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi529:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi530:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi531:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi532:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi533:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi534:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi535:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi536:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi537:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi538:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi539:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi540:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi541:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi542:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi543:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi544:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi545:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi546:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi547:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi548:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi549:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi550:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi551:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sgt <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi552:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi553:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi554:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi555:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi556:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi557:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi558:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi559:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi560:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi561:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi562:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi563:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi564:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi565:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi566:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi567:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi568:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi569:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi570:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi571:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi572:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi573:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi574:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi575:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi576:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi577:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi578:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi579:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi580:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi581:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi582:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi583:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi584:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi585:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi586:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi587:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi588:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi589:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi590:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi591:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi592:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi593:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi594:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi595:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi596:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi597:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi598:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi599:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi600:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi601:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi602:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi603:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi604:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi605:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi606:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi607:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi608:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi609:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi610:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi611:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi612:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi613:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi614:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi615:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sgt <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi616:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi617:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi618:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm0
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp sgt <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtw (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi619:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi620:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi621:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm0
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp sgt <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi622:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi623:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi624:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm3
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp sgt <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtw (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi625:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi626:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi627:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; NoVLX-NEXT: vmovq %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm2, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm3, %ymm3
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp sgt <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi628:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi629:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi630:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi631:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi632:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi633:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi634:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi635:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi636:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi637:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi638:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi639:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi640:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi641:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi642:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi643:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi644:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi645:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi646:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi647:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi648:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi649:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi650:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi651:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi652:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi653:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi654:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi655:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi656:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi657:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi658:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi659:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi660:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi661:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi662:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi663:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi664:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi665:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi666:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi667:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi668:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi669:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi670:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi671:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi672:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi673:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi674:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi675:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi676:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi677:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi678:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi679:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi680:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi681:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi682:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi683:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi684:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi685:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi686:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi687:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi688:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi689:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi690:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi691:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi692:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi693:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi694:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi695:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi696:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi697:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi698:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi699:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi700:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi701:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi702:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi703:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi704:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi705:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi706:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi707:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi708:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi709:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi710:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi711:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi712:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi713:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi714:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi715:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi716:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi717:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi718:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi719:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi720:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi721:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi722:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi723:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi724:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi725:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi726:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi727:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi728:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi729:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi730:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi731:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi732:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi733:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi734:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi735:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi736:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi737:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi738:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi739:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi740:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi741:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi742:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi743:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi744:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi745:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi746:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi747:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi748:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi749:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi750:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi751:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi752:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi753:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi754:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi755:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi756:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi757:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi758:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi759:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi760:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi761:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi762:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi763:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi764:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi765:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi766:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi767:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi768:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi769:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi770:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi771:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi772:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi773:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi774:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi775:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi776:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi777:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi778:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi779:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sgt <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi780:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi781:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi782:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi783:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi784:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi785:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi786:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi787:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi788:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi789:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi790:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi791:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi792:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi793:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi794:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi795:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi796:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi797:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi798:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi799:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi800:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi801:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi802:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi803:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi804:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi805:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi806:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi807:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi808:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi809:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi810:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi811:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi812:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi813:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi814:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi815:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi816:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi817:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi818:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi819:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi820:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi821:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi822:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi823:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi824:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi825:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi826:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi827:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi828:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi829:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi830:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi831:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sgt <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi832:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi833:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi834:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi835:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi836:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi837:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi838:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi839:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi840:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi841:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi842:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi843:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi844:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi845:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi846:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi847:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi848:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi849:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi850:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi851:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi852:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi853:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi854:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi855:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi856:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi857:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi858:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi859:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi860:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi861:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi862:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi863:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi864:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi865:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi866:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi867:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi868:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi869:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi870:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi871:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi872:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi873:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi874:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi875:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi876:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi877:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi878:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi879:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi880:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi881:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi882:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi883:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi884:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi885:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi886:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi887:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi888:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi889:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi890:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi891:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi892:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi893:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi894:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi895:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi896:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi897:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi898:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi899:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi900:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi901:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi902:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi903:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sgt <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi904:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi905:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi906:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi907:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi908:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi909:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi910:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi911:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi912:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi913:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi914:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi915:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi916:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi917:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi918:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi919:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi920:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi921:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi922:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi923:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi924:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi925:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi926:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi927:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi928:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi929:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi930:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi931:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi932:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi933:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi934:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi935:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi936:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi937:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi938:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi939:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi940:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi941:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi942:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi943:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi944:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi945:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi946:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi947:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi948:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi949:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi950:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi951:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi952:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi953:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi954:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi955:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi956:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi957:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi958:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi959:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi960:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi961:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi962:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi963:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi964:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi965:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi966:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi967:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp sge <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpleb %ymm0, %ymm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi968:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi969:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi970:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp sge <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltb (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi971:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi972:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi973:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp sge <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi974:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi975:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi976:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp sge <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltb (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi977:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi978:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi979:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vmovdqa (%rsi), %ymm3
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm3, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp sge <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi980:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi981:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi982:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi983:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi984:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi985:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi986:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi987:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi988:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi989:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi990:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi991:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi992:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi993:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi994:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi995:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi996:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi997:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi998:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi999:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1000:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1001:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1002:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1003:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp sge <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1004:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1005:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1006:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1007:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1008:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1009:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1010:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1011:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1012:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1013:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1014:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1015:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1016:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1017:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1018:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1019:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1020:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1021:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1022:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1023:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1024:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1025:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1026:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1027:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1028:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1029:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1030:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1031:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1032:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1033:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1034:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1035:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1036:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1037:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1038:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1039:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1040:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1041:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1042:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1043:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1044:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1045:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1046:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1047:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1048:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1049:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1050:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1051:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1052:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1053:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1054:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1055:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1056:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1057:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1058:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1059:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1060:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1061:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1062:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1063:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1064:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1065:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1066:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1067:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp sge <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmplew %zmm0, %zmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1068:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1069:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1070:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm2
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm0
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp sge <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltw (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1071:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1072:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1073:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vmovdqa (%rdi), %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm0
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp sge <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmplew %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1074:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1075:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1076:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm3
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp sge <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltw (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1077:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1078:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1079:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; NoVLX-NEXT: vmovq %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm4
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
; NoVLX-NEXT: vmovdqa (%rsi), %ymm4
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm5
; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm3
; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm3
; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
; NoVLX-NEXT: vpxor %ymm4, %ymm5, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp sge <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rdi), %xmm1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rsi), %xmm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rdi), %xmm1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rsi), %xmm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1080:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1081:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1082:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1083:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1084:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1085:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1086:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1087:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1088:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1089:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1090:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1091:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rdi), %xmm1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1092:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1093:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1094:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rsi), %xmm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1095:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1096:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1097:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1098:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1099:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1100:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1101:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1102:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1103:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1104:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1105:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1106:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1107:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1108:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1109:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rdi), %xmm1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1110:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1111:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1112:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rsi), %xmm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1113:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1114:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1115:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rdi), %ymm1
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rsi), %ymm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1116:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1117:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1118:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1119:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1120:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1121:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1122:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1123:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1124:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1125:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1126:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1127:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rdi), %ymm1
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1128:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1129:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1130:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rsi), %ymm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1131:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1132:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1133:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1134:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1135:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1136:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1137:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1138:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1139:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1140:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1141:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1142:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1143:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1144:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1145:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rdi), %ymm1
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1146:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1147:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1148:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rsi), %ymm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1149:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1150:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1151:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1152:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1153:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1154:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1155:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1156:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1157:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1158:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1159:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1160:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1161:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1162:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1163:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1164:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1165:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1166:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1167:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1168:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1169:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1170:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1171:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1172:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1173:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1174:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1175:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1176:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1177:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1178:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1179:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1180:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1181:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1182:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1183:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rdi), %zmm1
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1184:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1185:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1186:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1187:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1188:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1189:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1190:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1191:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rsi), %zmm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1192:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1193:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1194:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1195:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1196:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1197:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1198:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1199:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1200:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1201:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1202:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1203:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1204:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1205:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1206:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1207:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1208:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1209:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1210:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1211:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1212:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1213:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1214:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1215:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1216:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1217:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1218:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1219:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1220:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1221:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1222:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1223:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1224:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1225:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1226:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1227:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1228:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1229:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1230:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1231:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp sge <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rdi), %zmm1
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1232:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1233:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1234:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1235:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1236:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1237:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1238:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1239:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastd (%rsi), %zmm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1240:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1241:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1242:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1243:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1244:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1245:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1246:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1247:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rdi), %xmm1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rsi), %xmm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rdi), %xmm1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rsi), %xmm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rdi), %xmm1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rsi), %xmm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1248:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1249:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1250:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1251:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1252:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1253:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1254:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1255:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1256:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1257:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1258:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1259:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rdi), %xmm1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1260:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1261:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1262:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rsi), %xmm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1263:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1264:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1265:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1266:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1267:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1268:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1269:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1270:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1271:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1272:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1273:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1274:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1275:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1276:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1277:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rdi), %xmm1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1278:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1279:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1280:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rsi), %xmm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1281:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1282:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1283:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp sge <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rdi), %ymm1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rsi), %ymm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rdi), %ymm1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rsi), %ymm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1284:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1285:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1286:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1287:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1288:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1289:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1290:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1291:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1292:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1293:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1294:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1295:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rdi), %ymm1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1296:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1297:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1298:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rsi), %ymm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1299:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1300:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1301:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1302:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1303:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1304:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1305:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1306:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1307:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1308:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1309:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1310:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1311:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1312:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1313:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rdi), %ymm1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1314:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1315:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1316:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rsi), %ymm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1317:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1318:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1319:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rdi), %zmm1
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rsi), %zmm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1320:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1321:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1322:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1323:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1324:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1325:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1326:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1327:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1328:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1329:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1330:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1331:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rdi), %zmm1
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1332:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1333:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1334:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rsi), %zmm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1335:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1336:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1337:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1338:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1339:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1340:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1341:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1342:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1343:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1344:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1345:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1346:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1347:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1348:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1349:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rdi), %zmm1
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1350:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1351:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1352:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpbroadcastq (%rsi), %zmm1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1353:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1354:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1355:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp sge <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1356:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1357:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1358:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1359:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1360:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1361:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1362:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1363:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1364:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1365:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1366:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1367:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1368:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1369:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1370:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1371:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1372:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1373:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1374:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1375:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1376:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1377:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1378:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1379:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1380:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1381:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1382:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1383:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1384:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1385:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1386:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1387:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1388:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1389:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1390:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1391:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1392:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1393:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1394:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1395:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1396:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1397:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1398:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1399:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1400:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1401:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1402:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1403:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1404:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1405:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1406:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1407:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1408:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1409:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1410:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1411:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%1 = bitcast <2 x i64> %__b to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1412:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1413:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1414:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1415:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1416:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1417:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1418:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1419:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <16 x i8>
%2 = icmp ult <16 x i8> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1420:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1421:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1422:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp ult <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltub (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1423:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1424:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1425:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp ult <32 x i8> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1426:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1427:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1428:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%1 = bitcast <4 x i64> %__b to <32 x i8>
%2 = icmp ult <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltub (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1429:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1430:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1431:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm3, %ymm3
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm3, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <32 x i8>
%2 = icmp ult <32 x i8> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1432:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1433:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1434:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1435:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1436:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1437:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1438:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1439:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1440:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1441:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1442:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1443:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1444:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1445:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1446:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1447:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1448:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1449:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1450:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1451:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1452:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%1 = bitcast <2 x i64> %__b to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1453:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1454:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1455:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <8 x i16>
%2 = icmp ult <8 x i16> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1456:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1457:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1458:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1459:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1460:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1461:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1462:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1463:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1464:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1465:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1466:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1467:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1468:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1469:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1470:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1471:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1472:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1473:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1474:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1475:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1476:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1477:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1478:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1479:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1480:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1481:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1482:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1483:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1484:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1485:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1486:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1487:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1488:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1489:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1490:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1491:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1492:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1493:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1494:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1495:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1496:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1497:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1498:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1499:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1500:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1501:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1502:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1503:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1504:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1505:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1506:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1507:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1508:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1509:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1510:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1511:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%1 = bitcast <4 x i64> %__b to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1512:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1513:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1514:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1515:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1516:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1517:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1518:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1519:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <16 x i16>
%2 = icmp ult <16 x i16> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1520:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1521:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1522:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm3, %ymm3
; NoVLX-NEXT: vpxor %ymm2, %ymm4, %ymm4
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpmovsxbd %xmm3, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm0
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp ult <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuw (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1523:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1524:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1525:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm2
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
; NoVLX-NEXT: vpxor 32(%rdi), %ymm1, %ymm3
; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm0
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp ult <32 x i16> %0, %1
%3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1526:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1527:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1528:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm8
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm4
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm5, %ymm6, %ymm3
; NoVLX-NEXT: vpxor %ymm5, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3
; NoVLX-NEXT: vpxor %ymm5, %ymm8, %ymm2
; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm4
; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpand %xmm1, %xmm2, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%1 = bitcast <8 x i64> %__b to <32 x i16>
%2 = icmp ult <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuw (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1529:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1530:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1531:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; NoVLX-NEXT: vmovq %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm6
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm3
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm6, %xmm2
; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
; NoVLX-NEXT: vpxor (%rsi), %ymm4, %ymm5
; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm5, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
; NoVLX-NEXT: vpxor 32(%rsi), %ymm4, %ymm4
; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3
; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm3
; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %ecx
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: shlq $32, %rax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <32 x i16>
%2 = icmp ult <32 x i16> %0, %1
%3 = bitcast i32 %__u to <32 x i1>
%4 = and <32 x i1> %2, %3
%5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1532:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1533:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1534:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1535:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1536:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1537:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1538:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1539:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1540:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1541:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1542:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1543:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1544:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1545:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1546:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1547:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1548:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1549:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1550:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1551:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1552:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1553:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1554:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1555:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1556:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1557:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1558:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%1 = bitcast <2 x i64> %__b to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1559:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1560:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1561:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x i32>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1562:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1563:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1564:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1565:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1566:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1567:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
%load = load i32, i32* %__b
%vec = insertelement <4 x i32> undef, i32 %load, i32 0
%1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1568:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1569:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1570:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1571:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1572:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1573:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1574:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1575:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1576:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1577:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1578:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1579:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1580:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1581:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1582:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1583:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1584:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1585:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1586:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1587:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1588:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1589:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1590:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1591:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1592:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1593:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1594:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1595:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1596:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1597:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x i32>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1598:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1599:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1600:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i32> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1601:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1602:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1603:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%load = load i32, i32* %__b
%vec = insertelement <8 x i32> undef, i32 %load, i32 0
%1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i32> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1604:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1605:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1606:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1607:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1608:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1609:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1610:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1611:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1612:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1613:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1614:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1615:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1616:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1617:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1618:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1619:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1620:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1621:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1622:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1623:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1624:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1625:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1626:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1627:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1628:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1629:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1630:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1631:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1632:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1633:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1634:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1635:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1636:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1637:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1638:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1639:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1640:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1641:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1642:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1643:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1644:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1645:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1646:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1647:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1648:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1649:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1650:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1651:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1652:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1653:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1654:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1655:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1656:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1657:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1658:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1659:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1660:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1661:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1662:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1663:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1664:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1665:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1666:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1667:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1668:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1669:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1670:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1671:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1672:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1673:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1674:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1675:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%1 = bitcast <8 x i64> %__b to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1676:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1677:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1678:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1679:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1680:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1681:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1682:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1683:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x i32>
%2 = icmp ult <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1684:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1685:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1686:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1687:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1688:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1689:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1690:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1691:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <16 x i32> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1692:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1693:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1694:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1695:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1696:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1697:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1698:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1699:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
%load = load i32, i32* %__b
%vec = insertelement <16 x i32> undef, i32 %load, i32 0
%1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <16 x i32> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %3, %2
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1700:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1701:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1702:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1703:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1704:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1705:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1706:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1707:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1708:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1709:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1710:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1711:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1712:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1713:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1714:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1715:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1716:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1717:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1718:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1719:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1720:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1721:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1722:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1723:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1724:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1725:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1726:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%1 = bitcast <2 x i64> %__b to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1727:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1728:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1729:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x i64>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %2, %extract.i
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1730:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1731:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1732:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1733:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1734:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1735:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
%load = load i64, i64* %__b
%vec = insertelement <2 x i64> undef, i64 %load, i32 0
%1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = icmp ult <2 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
%4 = and <2 x i1> %extract.i, %2
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1736:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1737:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1738:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1739:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1740:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1741:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1742:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1743:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1744:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1745:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1746:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1747:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1748:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1749:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1750:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1751:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1752:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1753:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1754:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1755:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1756:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1757:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1758:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1759:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1760:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1761:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1762:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%1 = bitcast <4 x i64> %__b to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1763:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1764:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1765:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x i64>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %2, %extract.i
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1766:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1767:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1768:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1769:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1770:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1771:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
; NoVLX-NEXT: kshiftrw $15, %k3, %k3
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
; NoVLX-NEXT: vmovd %ecx, %xmm1
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
%load = load i64, i64* %__b
%vec = insertelement <4 x i64> undef, i64 %load, i32 0
%1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <4 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = and <4 x i1> %extract.i, %2
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1772:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1773:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1774:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1775:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1776:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1777:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1778:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1779:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1780:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1781:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1782:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1783:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1784:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1785:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1786:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1787:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1788:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1789:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1790:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1791:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1792:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1793:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1794:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1795:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1796:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1797:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1798:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%1 = bitcast <8 x i64> %__b to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1799:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1800:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1801:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x i64>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1802:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1803:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1804:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i64> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1805:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1806:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1807:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
%load = load i64, i64* %__b
%vec = insertelement <8 x i64> undef, i64 %load, i32 0
%1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = icmp ult <8 x i64> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %3, %2
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2
; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2
; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1808:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1809:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1810:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1811:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1812:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1813:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1814:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1815:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1816:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1817:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1818:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1819:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1820:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1821:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1822:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1823:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1824:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1825:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2
; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1826:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1827:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1828:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1829:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1830:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1831:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1832:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1833:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1834:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1835:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1836:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1837:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%1 = bitcast <2 x i64> %__b to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1838:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1839:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1840:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <4 x float>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1841:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1842:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1843:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2
; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x float>
%load = load float, float* %__b
%vec = insertelement <4 x float> undef, float %load, i32 0
%1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x float> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%1 = bitcast <4 x i64> %__b to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovaps (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load float, float* %__b
%vec = insertelement <8 x float> undef, float %load, i32 0
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%1 = bitcast <4 x i64> %__b to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovaps (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load float, float* %__b
%vec = insertelement <8 x float> undef, float %load, i32 0
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1844:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1845:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1846:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%1 = bitcast <4 x i64> %__b to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1847:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1848:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1849:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovaps (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1850:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1851:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1852:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load float, float* %__b
%vec = insertelement <8 x float> undef, float %load, i32 0
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1853:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1854:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1855:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%1 = bitcast <4 x i64> %__b to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1856:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1857:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1858:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovaps (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1859:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1860:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1861:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load float, float* %__b
%vec = insertelement <8 x float> undef, float %load, i32 0
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1862:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1863:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1864:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%1 = bitcast <4 x i64> %__b to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1865:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1866:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1867:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovaps (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1868:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1869:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1870:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load float, float* %__b
%vec = insertelement <8 x float> undef, float %load, i32 0
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x float> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1871:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1872:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1873:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%1 = bitcast <4 x i64> %__b to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1874:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1875:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1876:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovaps (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <8 x float>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1877:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1878:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1879:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x float>
%load = load float, float* %__b
%vec = insertelement <8 x float> undef, float %load, i32 0
%1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x float> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1880:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1881:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1882:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1883:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1884:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1885:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1886:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1887:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1888:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1889:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1890:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1891:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1892:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1893:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1894:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1895:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1896:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1897:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1898:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1899:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1900:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1901:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1902:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1903:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load float, float* %__b
%vec = insertelement <16 x float> undef, float %load, i32 0
%1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <16 x float> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1904:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1905:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1906:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1907:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1908:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1909:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1910:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1911:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1912:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1913:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1914:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1915:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1916:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1917:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1918:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1919:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1920:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1921:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1922:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: .Lcfi1923:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1924:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1925:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1926:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1927:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load float, float* %__b
%vec = insertelement <16 x float> undef, float %load, i32 0
%1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <16 x float> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 -1, i32 8)
%3 = zext i16 %2 to i32
ret i32 %3
}
define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovw %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 %__u, i32 8)
%3 = zext i16 %2 to i32
ret i32 %3
}
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1928:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1929:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1930:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1931:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1932:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1933:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1934:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1935:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1936:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1937:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1938:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1939:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1940:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1941:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1942:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1943:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1944:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1945:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1946:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1947:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1948:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1949:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1950:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1951:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load float, float* %__b
%vec = insertelement <16 x float> undef, float %load, i32 0
%1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <16 x float> %0, %1
%3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1952:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1953:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1954:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1955:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1956:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1957:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1958:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1959:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1960:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1961:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1962:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1963:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1964:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1965:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1966:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1967:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <16 x float>
%2 = fcmp oeq <16 x float> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1968:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1969:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1970:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
; NoVLX-NEXT: pushq %r13
; NoVLX-NEXT: pushq %r12
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: .Lcfi1971:
; NoVLX-NEXT: .cfi_offset %rbx, -56
; NoVLX-NEXT: .Lcfi1972:
; NoVLX-NEXT: .cfi_offset %r12, -48
; NoVLX-NEXT: .Lcfi1973:
; NoVLX-NEXT: .cfi_offset %r13, -40
; NoVLX-NEXT: .Lcfi1974:
; NoVLX-NEXT: .cfi_offset %r14, -32
; NoVLX-NEXT: .Lcfi1975:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: leaq -40(%rbp), %rsp
; NoVLX-NEXT: popq %rbx
; NoVLX-NEXT: popq %r12
; NoVLX-NEXT: popq %r13
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%load = load float, float* %__b
%vec = insertelement <16 x float> undef, float %load, i32 0
%1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <16 x float> %0, %1
%3 = bitcast i16 %__u to <16 x i1>
%4 = and <16 x i1> %2, %3
%5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: movzwl %ax, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 -1, i32 8)
%3 = zext i16 %2 to i64
ret i64 %3
}
define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: movzwl %ax, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x float>
%1 = bitcast <8 x i64> %__b to <16 x float>
%2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 %__u, i32 8)
%3 = zext i16 %2 to i64
ret i64 %3
}
declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%4 = bitcast <4 x i1> %3 to i4
ret i4 %4
}
define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%6 = bitcast <4 x i1> %5 to i4
ret i4 %6
}
define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1976:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1977:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1978:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1979:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1980:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1981:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1982:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1983:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1984:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1985:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1986:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1987:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1988:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1989:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1990:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1991:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1992:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1993:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1994:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1995:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1996:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi1997:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi1998:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi1999:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2000:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2001:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2002:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2003:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2004:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2005:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%1 = bitcast <2 x i64> %__b to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2006:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2007:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2008:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load <2 x i64>, <2 x i64>* %__b
%1 = bitcast <2 x i64> %load to <2 x double>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2009:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2010:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2011:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x double>
%load = load double, double* %__b
%vec = insertelement <2 x double> undef, double %load, i32 0
%1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
%2 = fcmp oeq <2 x double> %0, %1
%3 = bitcast i2 %__u to <2 x i1>
%4 = and <2 x i1> %2, %3
%5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <8 x i1> %3 to i8
ret i8 %4
}
define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2
; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kshiftlw $7, %k0, %k0
; NoVLX-NEXT: kshiftrw $7, %k0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7]
; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <8 x i1> %5 to i8
ret i8 %6
}
define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2
; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $0, %xmm0, %eax
; NoVLX-NEXT: andl $1, %eax
; NoVLX-NEXT: kmovw %eax, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kshiftrw $1, %k1, %k1
; NoVLX-NEXT: kshiftlw $1, %k1, %k1
; NoVLX-NEXT: korw %k0, %k1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpextrb $12, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15]
; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2012:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2013:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2014:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2015:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2016:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2017:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2018:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2019:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2020:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2021:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2022:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2023:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2024:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2025:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2026:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2027:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2028:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2029:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2
; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2030:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2031:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2032:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2033:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2034:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2035:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2036:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2037:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2038:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2039:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2040:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2041:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm2, %ymm2
; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%1 = bitcast <4 x i64> %__b to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2042:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2043:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2044:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load <4 x i64>, <4 x i64>* %__b
%1 = bitcast <4 x i64> %load to <4 x double>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2045:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2046:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2047:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NoVLX-NEXT: vpmovqd %zmm1, %ymm1
; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2
; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x double>
%load = load double, double* %__b
%vec = insertelement <4 x double> undef, double %load, i32 0
%1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <4 x double> %0, %1
%3 = bitcast i4 %__u to <4 x i1>
%4 = and <4 x i1> %2, %3
%5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load double, double* %__b
%vec = insertelement <8 x double> undef, double %load, i32 0
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load double, double* %__b
%vec = insertelement <8 x double> undef, double %load, i32 0
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <16 x i1> %5 to i16
ret i16 %6
}
define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8)
%3 = zext i8 %2 to i16
ret i16 %3
}
define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 %__u, i32 8)
%3 = zext i8 %2 to i16
ret i16 %3
}
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2048:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2049:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2050:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2051:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2052:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2053:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2054:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2055:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2056:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load double, double* %__b
%vec = insertelement <8 x double> undef, double %load, i32 0
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <32 x i1> %3 to i32
ret i32 %4
}
define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2057:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2058:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2059:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2060:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2061:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2062:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2063:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2064:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2065:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load double, double* %__b
%vec = insertelement <8 x double> undef, double %load, i32 0
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <32 x i1> %5 to i32
ret i32 %6
}
define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8)
%3 = zext i8 %2 to i32
ret i32 %3
}
define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 %__u, i32 8)
%3 = zext i8 %2 to i32
ret i32 %3
}
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2066:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2067:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2068:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2069:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2070:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2071:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2072:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2073:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2074:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load double, double* %__b
%vec = insertelement <8 x double> undef, double %load, i32 0
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x double> %0, %1
%3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%4 = bitcast <64 x i1> %3 to i64
ret i64 %4
}
define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2075:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2076:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2077:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2078:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2079:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2080:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load <8 x i64>, <8 x i64>* %__b
%1 = bitcast <8 x i64> %load to <8 x double>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
; NoVLX-NEXT: .Lcfi2081:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
; NoVLX-NEXT: .Lcfi2082:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2083:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; NoVLX-NEXT: shlq $32, %rcx
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%load = load double, double* %__b
%vec = insertelement <8 x double> undef, double %load, i32 0
%1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = fcmp oeq <8 x double> %0, %1
%3 = bitcast i8 %__u to <8 x i1>
%4 = and <8 x i1> %2, %3
%5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%6 = bitcast <64 x i1> %5 to i64
ret i64 %6
}
define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8)
%3 = zext i8 %2 to i64
ret i64 %3
}
define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
; VLX: # BB#0: # %entry
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: movzbl %al, %eax
; VLX-NEXT: vzeroupper
; VLX-NEXT: retq
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movzbl %al, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x double>
%1 = bitcast <8 x i64> %__b to <8 x double>
%2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 %__u, i32 8)
%3 = zext i8 %2 to i64
ret i64 %3
}